def __init__(self, num_classes: int = 1000) -> None: super(QuantizationAlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), )
def __init__( self, n_heads, d_model, dropout_rate=0.0, skip_term_b=False, share_qvk_proj=False, ): super(MultiHeadedSelfAttentionWithRelPos, self).__init__( n_heads, d_model, dropout_rate, share_qvk_proj ) self.d_model = d_model self.share_qvk_proj = share_qvk_proj self.skip_term_b = skip_term_b self.nheads = n_heads self.d_k = d_model // n_heads self.qvk_proj = nn.Linear( d_model, d_model if self.share_qvk_proj else d_model * 3 ) self.pos_proj = nn.Linear(d_model, d_model, bias=False) self.posu = nn.Parameter(flow.Tensor(1, 1, n_heads, self.d_k)) self.posv = nn.Parameter(flow.Tensor(1, 1, n_heads, self.d_k))
def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16*5*5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10)
def __init__(self, num_classes=10): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2), nn.Conv2d(64, 192, kernel_size=3, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2)) self.fc_layers = nn.Sequential( nn.Dropout(0.6), nn.Linear(4096, 2048), nn.ReLU(inplace=True), nn.Dropout(0.6), nn.Linear(2048, 2048), nn.ReLU(inplace=True), nn.Linear(2048, num_classes), )
def __init__( self, in_channels=1, out_channels=32, input_dim=312, hidden_dim=32, output_dim=10, ): super(cnn1d_ser, self).__init__() self.classifier = nn.Sequential( nn.Conv1d(in_channels, out_channels, 5, stride=1, padding=2), nn.BatchNorm1d(out_channels), nn.ReLU(), nn.Dropout(0.5), nn.Conv1d(out_channels, out_channels, 5, stride=1, padding=2), nn.BatchNorm1d(out_channels), nn.ReLU(), nn.Dropout(0.5), nn.Flatten(), nn.Linear(input_dim * out_channels, hidden_dim), nn.BatchNorm1d(hidden_dim), nn.ReLU(), nn.Dropout(0.5), nn.Linear(hidden_dim, output_dim), )
def __init__( self, max_position_embeddings, hidden_size, nheads, dropout=0, position_embedding_type="absolute", is_decoder=False, ): super(BertSelfAttention, self).__init__() if hidden_size % nheads != 0: raise ValueError( f"The hidden size ({hidden_size}) is not a multiple of the number of attention " f"heads ({nheads})") self.num_attention_heads = nheads self.attention_head_size = int(hidden_size / nheads) self.all_head_size = self.num_attention_heads * self.attention_head_size self.query = nn.Linear(hidden_size, self.all_head_size) self.key = nn.Linear(hidden_size, self.all_head_size) self.value = nn.Linear(hidden_size, self.all_head_size) self.dropout = nn.Dropout(dropout) self.position_embedding_type = position_embedding_type if (self.position_embedding_type == "relative_key" or self.position_embedding_type == "relative_key_query"): self.max_position_embeddings = max_position_embeddings self.distance_embedding = nn.Embedding( 2 * max_position_embeddings - 1, self.attention_head_size) self.is_decoder = is_decoder
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): super().__init__() self.n_head = n_head self.d_k = d_k self.d_v = d_v self.w_qs = nn.Linear(d_model, n_head * d_k) self.w_ks = nn.Linear(d_model, n_head * d_k) self.w_vs = nn.Linear(d_model, n_head * d_v) nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v))) self.attention = ScaledDotProductAttention(temperature=np.power( d_k, 0.5), attn_dropout=dropout) self.layer_norm = nn.LayerNorm(d_model) self.fc = nn.Linear(n_head * d_v, d_model) nn.init.xavier_normal_(self.fc.weight) self.dropout = nn.Dropout(dropout)
def __init__(self): super().__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10)
def __init__(self, input_size, hidden_size, output_size): super().__init__() self.hidden_size = hidden_size self.i2h = nn.Linear(input_size + hidden_size, hidden_size) self.i2o = nn.Linear(input_size + hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1)
def __init__( self, input_size=784, hidden_size1=128, hidden_size2=64, num_classes=10 ): super(Net, self).__init__() self.l1 = nn.Linear(input_size, hidden_size1) self.relu1 = nn.ReLU() self.l2 = nn.Linear(hidden_size1, hidden_size2) self.relu2 = nn.ReLU() self.l3 = nn.Linear(hidden_size2, num_classes)
def __init__(self, input_dim, hidden_dim, output_dim, batch_size): super(lstm_ser, self).__init__() self.classifier = nn.Sequential( LSTM(input_dim, hidden_dim, batch_size), nn.Dropout(0.5), nn.Linear(hidden_dim, 32), nn.ReLU(), nn.Linear(32, output_dim), )
def __init__(self, d_model, d_ff, dropout, activation="relu"): super(PositionwiseFeedForward, self).__init__() self.activation = activation assert activation in ["relu", "gelu", "glu", "tanh", "swish"] self.w_1 = nn.Linear(d_model, d_ff * 2 if activation == "glu" else d_ff) self.w_2 = nn.Linear(d_ff, d_model) self.dropout = nn.Dropout(dropout)
def __init__( self, hidden_size: int, intermediate_size: int, hidden_dropout_prob: float = 0.1, hidden_act: str = "relu", ) -> None: super().__init__() self.hidden_act = hidden_act self.intermediate = nn.Linear(hidden_size, intermediate_size) self.output = nn.Linear(intermediate_size, hidden_size) self.dropout = nn.Dropout(hidden_dropout_prob)
def __init__(self, in_dim, mlp_dim, out_dim, dropout_rate=0.1): super(MlpBlock, self).__init__() # init layers self.fc1 = nn.Linear(in_dim, mlp_dim) self.fc2 = nn.Linear(mlp_dim, out_dim) self.act = nn.GELU() if dropout_rate > 0.0: self.dropout1 = nn.Dropout(dropout_rate) self.dropout2 = nn.Dropout(dropout_rate) else: self.dropout1 = None self.dropout2 = None
def __init__(self, in_dim, heads=8, dropout_rate=0.1): super(SelfAttention, self).__init__() self.heads = heads self.head_dim = in_dim // heads self.scale = self.head_dim**0.5 self.query = nn.Linear(in_dim, self.heads * self.head_dim) self.key = nn.Linear(in_dim, self.heads * self.head_dim) self.value = nn.Linear(in_dim, self.heads * self.head_dim) self.out = nn.Linear(self.heads * self.head_dim, in_dim) if dropout_rate > 0: self.dropout = nn.Dropout(dropout_rate) else: self.dropout = None
def __init__( self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0, ): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features self.fc1 = nn.Linear(in_features, hidden_features) self.act = act_layer() self.fc2 = nn.Linear(hidden_features, out_features) self.drop = nn.Dropout(drop)
def __init__(self, num_speakers=2) -> None: super(simple_CNN, self).__init__() self.convs = nn.Sequential( nn.Conv1d(1, 16, 100, stride=10), nn.BatchNorm1d(16), nn.ReLU(), nn.Conv1d(16, 64, 21, stride=10), nn.BatchNorm1d(64), nn.ReLU(), nn.Conv1d(64, 64, 5, stride=5), nn.BatchNorm1d(64), nn.ReLU(), ) self.linears = nn.Sequential(nn.Linear(1 * 6 * 64, 128), nn.Linear(128, num_speakers))
def __init__( self, spatial_feature_size=7, dropout_ratio=0.8, num_classes=101, with_avg_pool=False, temporal_feature_size=1, in_channels=2048, init_std=0.01, fcn_testing=False, ): super(ClsHead, self).__init__() self.with_avg_pool = with_avg_pool self.dropout_ratio = dropout_ratio self.in_channels = in_channels self.dropout_ratio = dropout_ratio self.temporal_feature_size = temporal_feature_size self.spatial_feature_size = spatial_feature_size self.init_std = init_std self.fcn_testing = fcn_testing self.num_classes = num_classes if self.dropout_ratio != 0: self.dropout = nn.Dropout(p=self.dropout_ratio) else: self.dropout = None if self.with_avg_pool: self.avg_pool = nn.AvgPool3d( (temporal_feature_size, spatial_feature_size, spatial_feature_size)) self.fc_cls = nn.Linear(in_channels, num_classes) self.new_cls = None
def __init__( self, input_sz, output_sz, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout, ): super(TransformerModel, self).__init__() self.transformer = Transformer( d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=False, ) self.softmax = nn.Softmax(dim=2) self.linear = nn.Linear(d_model, output_sz) self.pos_encoder = PositionalEncoding(d_model, dropout) self.pos_decoder = PositionalEncoding(d_model, dropout) self.src_embedding = Embeddings(input_sz, d_model) self.tgt_embedding = Embeddings(output_sz, d_model)
def __init__( self, n_heads, d_model, memory_dim, dropout_rate=0.0, share_vk_proj=False ): super(MultiHeadedCrossAttention, self).__init__( d_model, d_model, enable_output_proj=True, dropout=dropout_rate ) self.d_model = d_model self.share_vk_proj = share_vk_proj self.nheads = n_heads self.d_k = d_model // n_heads self.q_proj = nn.Linear(d_model, d_model) self.vk_proj = nn.Linear( memory_dim, d_model if self.share_vk_proj else d_model * 2 )
def __init__( self, vocab_size, seq_length, hidden_size, hidden_layers, atten_heads, intermediate_size, hidden_act, hidden_dropout_prob, attention_probs_dropout_prob, max_position_embeddings, type_vocab_size, initializer_range=0.02, ): super().__init__() self.bert = BertModel( vocab_size, seq_length, hidden_size, hidden_layers, atten_heads, intermediate_size, hidden_act, hidden_dropout_prob, attention_probs_dropout_prob, max_position_embeddings, type_vocab_size, ) self.seq_length = seq_length self.hidden_size = hidden_size self.cls_squad = nn.Linear(hidden_size, 2) self.cls_squad.weight.data.normal_(mean=0.0, std=initializer_range) self.cls_squad.bias.data.fill_(0)
def __init__(self, cfg): super(DPN, self).__init__() in_planes, out_planes = cfg['in_planes'], cfg['out_planes'] num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth'] self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.last_planes = 64 self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1) self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2) self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2) self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2) self.linear = nn.Linear( out_planes[3] + (num_blocks[3] + 1) * dense_depth[3], 10)
def __init__(self, num_classes: int = 5) -> None: super(PoseNet, self).__init__() self.conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, stride=2) self.conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3) self.conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) self.MaxPool_3a_3x3 = nn.MaxPool2d(3, stride=2) self.conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) self.conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) self.MaxPool_5a_3x3 = nn.MaxPool2d(kernel_size=3, stride=2) # stem self.Mixed_5b = self._generate_inception_module(192, 320, 1, Mixed_5b) self.block35 = self._generate_inception_module(320, 320, 1, block35) self.conv_ls1 = BasicConv2d(320, 320, kernel_size=3, stride=2, padding=1) self.MaxPool_3x3_ls1 = nn.MaxPool2d(kernel_size=3, stride=2) self.Mixed_6a = self._generate_inception_module(320, 1088, 1, Mixed_6a) self.block17 = self._generate_inception_module(1088, 1088, 1, block17) self.conv_ls2 = BasicConv2d(1088, 1088, kernel_size=3, stride=2) self.Mixed_7a = self._generate_inception_module(1088, 2080, 1, Mixed_7a) self.block8 = self._generate_inception_module(2080, 2080, 1, block8) self.conv_ls3 = BasicConv2d(3488, 2080, kernel_size=1) self.Conv2d_7b_1x1 = BasicConv2d(2080, 1536, kernel_size=1) self.AvgPool_1a_8x8 = nn.AvgPool2d(kernel_size=[8, 8]) self.dense = nn.Linear(1536, num_classes) self.relu = nn.ReLU(inplace=True)
def __init__( self, wide_vocab_size: int, deep_vocab_size: int, deep_embedding_vec_size: int = 16, num_deep_sparse_fields: int = 26, num_dense_fields: int = 13, hidden_size: int = 1024, hidden_units_num: int = 7, deep_dropout_rate: float = 0.5, ): super(LocalWideAndDeep, self).__init__() self.wide_embedding = Embedding( wide_vocab_size, 1, ) self.deep_embedding = Embedding(deep_vocab_size, deep_embedding_vec_size) deep_feature_size = (deep_embedding_vec_size * num_deep_sparse_fields + num_dense_fields) self.linear_layers = nn.Sequential( OrderedDict([( f"fc{i}", Dense( deep_feature_size if i == 0 else hidden_size, hidden_size, deep_dropout_rate, ), ) for i in range(hidden_units_num)])) self.deep_scores = nn.Linear(hidden_size, 1) self.sigmoid = nn.Sigmoid()
def __init__( self, word_emb_dim, vocab_size, dim_channel, kernel_wins, dropout_rate, num_class, max_seq_len, training=True, ): super(textCNN, self).__init__() self.embed = nn.Embedding(vocab_size, word_emb_dim) self.convs = nn.ModuleList([ nn.Conv2d(1, dim_channel, (w, word_emb_dim)) for w in kernel_wins ]) self.maxpool = nn.ModuleList([ nn.MaxPool2d((max_seq_len - w + 1, 1), stride=1) for w in kernel_wins ]) # Dropout layer self.dropout = nn.Dropout(dropout_rate) self.training = training # FC layer self.fc = nn.Linear(len(kernel_wins) * dim_channel, num_class)
def __init__( self, c_in, c_cond, c_h, c_out, kernel_size, n_conv_blocks, upsample, act, sn, dropout_rate, ): super(Decoder, self).__init__() self.n_conv_blocks = n_conv_blocks self.upsample = upsample self.act = get_act(act) f = lambda x: x self.in_conv_layer = f(nn.Conv1d(c_in, c_h, kernel_size=1)) self.first_conv_layers = nn.ModuleList([ f(nn.Conv1d(c_h, c_h, kernel_size=kernel_size)) for _ in range(n_conv_blocks) ]) self.second_conv_layers = nn.ModuleList([ f(nn.Conv1d(c_h, c_h * up, kernel_size=kernel_size)) for _, up in zip(range(n_conv_blocks), self.upsample) ]) self.norm_layer = nn.InstanceNorm1d(c_h, affine=False) self.conv_affine_layers = nn.ModuleList( [f(nn.Linear(c_cond, c_h * 2)) for _ in range(n_conv_blocks * 2)]) self.out_conv_layer = f(nn.Conv1d(c_h, c_out, kernel_size=1)) self.dropout_layer = nn.Dropout(p=dropout_rate)
def __init__(self, hidden_size, intermediate_size, activation): super(BertIntermediate, self).__init__() self.dense = nn.Linear(hidden_size, intermediate_size) if isinstance(activation, str): self.intermediate_act_fn = ACT2FN[activation] else: self.intermediate_act_fn = activation
def __init__(self, params): super(RecurrentLanguageModel, self).__init__(params) self.model_type = "recurrent_lm" self.vocab_size = params["vocab_size"] self.share_embedding = params["share_embedding"] self.smoothing = params["smoothing"] self.num_layers = params["num_layers"] self.hidden_size = params["hidden_size"] self.embedding = nn.Embedding(params["vocab_size"], params["hidden_size"]) self.rnn = nn.LSTM( input_size=params["hidden_size"], hidden_size=params["hidden_size"], num_layers=params["num_layers"], batch_first=True, dropout=params["dropout"], bidirectional=False, ) self.output_project = nn.Linear(params["hidden_size"], params["vocab_size"]) if self.share_embedding: assert self.embedding.weight.size( ) == self.output_project.weight.size() self.output_project.weight = self.embedding.weight self.crit = LabelSmoothingLoss(size=self.vocab_size, smoothing=self.smoothing, padding_idx=PAD)
def __init__(self, block=BasicBlock, num_classes=10): super(DLA, self).__init__() self.base = nn.Sequential( nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(16), nn.ReLU(True) ) self.layer1 = nn.Sequential( nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(16), nn.ReLU(True) ) self.layer2 = nn.Sequential( nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(32), nn.ReLU(True) ) self.layer3 = Tree(block, 32, 64, level=1, stride=1) self.layer4 = Tree(block, 64, 128, level=2, stride=2) self.layer5 = Tree(block, 128, 256, level=2, stride=2) self.layer6 = Tree(block, 256, 512, level=1, stride=2) self.linear = nn.Linear(512, num_classes)
def __init__( self, vocab_size, d_model=256, n_heads=4, d_ff=2048, memory_dim=256, n_blocks=6, pos_dropout=0.0, slf_attn_dropout=0.0, src_attn_dropout=0.0, ffn_dropout=0.0, residual_dropout=0.1, activation="relu", normalize_before=True, concat_after=False, share_embedding=False, ): super(TransformerDecoder, self).__init__() self.decoder_type = "transformer" self.normalize_before = normalize_before self.relative_positional = False self.d_model = d_model self.embedding = nn.Embedding(vocab_size, d_model) self.pos_emb = PositionalEncoding(d_model, pos_dropout) self.blocks = nn.ModuleList( [ TransformerDecoderLayer( n_heads, d_model, d_ff, memory_dim, slf_attn_dropout, src_attn_dropout, ffn_dropout, residual_dropout, normalize_before=normalize_before, concat_after=concat_after, relative_positional=False, activation=activation, ) for _ in range(n_blocks) ] ) if self.normalize_before: self.after_norm = nn.LayerNorm(d_model) self.output_layer = nn.Linear(d_model, vocab_size) if share_embedding: assert self.embedding.weight.size() == self.output_layer.weight.size() self.output_layer.weight = self.embedding.weight logger.info("Tie the weights between the embedding and output layer.")