def __init__(self, seq_length: int, output_seq_length: int, n_time_series: int, d_model=128, output_dim=1, n_layers_encoder=6, forward_dim=2048, dropout=0.1, use_mask=False, meta_data=None, n_heads=8): """ Uses a number of encoder layers with simple linear decoder layer """ super().__init__() self.dense_shape = torch.nn.Linear(n_time_series, d_model) self.pe = SimplePositionalEncoding(d_model) encoder_layer = TransformerEncoderLayer(d_model, 8, forward_dim, dropout) encoder_norm = LayerNorm(d_model) self.transformer_enc = TransformerEncoder(encoder_layer, n_layers_encoder, encoder_norm) self.output_dim_layer = torch.nn.Linear(d_model, output_dim) self.output_seq_length = output_seq_length self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length) self.mask = generate_square_subsequent_mask(seq_length) self.mask_it = use_mask if meta_data: self.meta_merger = MergingModel(meta_data["method"], meta_data["params"])
def __init__(self, vocab_size: int, d_model: int, n_head: int, n_layers: int, dim_ff: int, dropout: float, pad_id: int): super(TransformerSpaceCorrector, self).__init__() self.vocab_size = vocab_size self.label_size = 2 + 1 self.d_model = d_model self.n_head = n_head self.n_layers = n_layers self.dim_ff = dim_ff self.dropout = dropout self.pad_id = pad_id self.embedding = nn.Embedding(vocab_size, d_model) self.label_embedding = nn.Embedding(self.label_size, d_model) self.position_embedding = PositionalEncoding(d_model, dropout) enc_layer = TransformerEncoderLayer(d_model, n_head, dim_ff) # enc_norm = LayerNorm(d_model) # self.encoder = TransformerEncoder(enc_layer, n_layers, enc_norm) self.encoder = TransformerEncoder(enc_layer, n_layers) self.classifier = Classifier(d_model=d_model, class_num=2, d_ff=128, dropout=dropout)
def __init__(self, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout, activation, src_vocab_size, tgt_vocab_size): super(TransformerModel, self).__init__() self.pos_encoder = PositionalEncoding( d_model=d_model, dropout=0.1) # , max_len=100) encoder_layer = TransformerEncoderLayer( d_model, nhead, dim_feedforward, dropout, activation) encoder_norm = LayerNorm(d_model) self.encoder = TransformerEncoder( encoder_layer, num_encoder_layers, encoder_norm) decoder_layer = TransformerDecoderLayer( d_model, nhead, dim_feedforward, dropout, activation) decoder_norm = LayerNorm(d_model) self.decoder = TransformerDecoder( decoder_layer, num_decoder_layers, decoder_norm) self.d_model = d_model self.nhead = nhead self.linear = Linear(d_model, tgt_vocab_size) self.transformer = Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, dim_feedforward=dim_feedforward, dropout=dropout, activation=activation) self.encoder_embedding = nn.Embedding(src_vocab_size, d_model) self.decoder_embedding = nn.Embedding(tgt_vocab_size, d_model) self._reset_parameters()
def __init__(self, encoder_type, vocab_size, embed_dim, encoder_dim, output_dim, dropout, **kwargs): super().__init__() if encoder_type == "bert": self.embed = None self.encoder = BertModel.from_pretrained('bert-base-uncased') # for p in self.encoder.parameters(): # p.requires_grad = False # self.encoder.cuda() elif encoder_type == "transformer": self.embed = nn.Embedding(vocab_size, embed_dim) encoder_layer = TransformerEncoderLayer(encoder_dim, nhead=8) encoder_norm = LayerNorm(encoder_dim) self.encoder = TransformerEncoder(encoder_layer, 1, encoder_norm) elif encoder_type in ["lstm", "rnn", "gru"]: self.embed = nn.Embedding(vocab_size, embed_dim) self.encoder = RNN(encoder_type, embed_dim, output_dim, bidirectional=kwargs.get( 'bidirectional', False), dropout=dropout) if encoder_dim != output_dim or kwargs.get('project', False): self.encoder_output_linear = nn.Linear(encoder_dim, output_dim) self.encoder_state_linear = nn.Linear(encoder_dim, output_dim) else: self.encoder_output_linear = nn.Sequential() self.encoder_state_linear = nn.Sequential() self.encoder_type = encoder_type self.dropout = nn.Dropout(dropout)
def __init__(self, seq_length: int, output_seq_length: int, n_time_series: int, d_model=128, output_dim=1, n_layers_encoder=6, use_mask=False, n_heads=8): """ Uses a number of encoder layers with simple linear decoder layer """ super().__init__() self.dense_shape = torch.nn.Linear(n_time_series, d_model) self.pe = SimplePositionalEncoding(d_model) encoder_layer = TransformerEncoderLayer(d_model, 8) encoder_norm = LayerNorm(d_model) self.transformer_enc = TransformerEncoder(encoder_layer, n_layers_encoder, encoder_norm) self.output_dim_layer = torch.nn.Linear(d_model, output_dim) self.output_seq_length = output_seq_length self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length) self.mask = generate_square_subsequent_mask(seq_length) self.mask_it = use_mask
def __init__(self, vocab_size: int, word_dim: int, d_model: int, n_head: int, n_layers: int, dim_ff: int, dropout: float, pad_id: int, n_class: int): super(TransformerHierachiSeqTagger, self).__init__() self.vocab_size = vocab_size self.word_dim = word_dim self.d_model = d_model self.n_head = n_head self.n_layers = n_layers self.dim_ff = dim_ff self.dropout = dropout self.pad_id = pad_id self.embedding = nn.Embedding(vocab_size, word_dim, padding_idx=pad_id) self.word_linear = nn.Linear(word_dim, d_model, bias=False) self.position_embedding = PositionalEncoding(d_model, dropout) enc_layer = TransformerEncoderLayer(d_model, n_head, dim_ff) self.encoder = TransformerEncoder(enc_layer, n_layers) self.classifier = Classifier(d_model=d_model, class_num=n_class, d_ff=dim_ff, dropout=dropout)
def __init__(self, vocab: int, d_model: int, n_head: int, n_layers: int, dim_ff: int, dropout: float, pad_id: int): super(TransformerEncoder, self).__init__() self.d_model = d_model self.pad_id = pad_id self.embedding = nn.Embedding(vocab, d_model) self.position_embedding = PositionalEncoding(d_model, dropout) enc_layer = TransformerEncoderLayer(d_model, n_head, dim_ff) enc_norm = LayerNorm(d_model) self.encoder = Enc(enc_layer, n_layers, enc_norm)
def __init__(self, args): super(TransformerMIL, self).__init__() encoder_layer = TransformerEncoderLayer(d_model=args.feature_depth, nhead=8, dim_feedforward=2048, dropout=args.dropout, activation="relu") encoder_norm = LayerNorm(args.feature_depth) self.attention = TransformerEncoder(encoder_layer, args.ntrans, encoder_norm) #self.attention1 = MultiheadAttention(args.feature_depth, 8) self.attention2 = MultiheadAttention(args.feature_depth, 8) self.classifier = Sequential(Linear(args.feature_depth, 1), Sigmoid()) self.mil = AttentionMILFeatures(args)
def __init__(self, num_tokens_per_channel, codebook_dim, upscale_factors, list_of_num_layers, n_head, d_model, dim_feedforward, num_tokens_bottleneck, dropout): super(AuxiliaryDecoder, self).__init__() assert len(list_of_num_layers) == len(upscale_factors) self.num_notes_per_voice = num_tokens_per_channel self.num_tokens_per_block = len(self.num_notes_per_voice) self.d_model = d_model self.codebook_dim = codebook_dim self.upscale_factors = upscale_factors # self.code_embedding = nn.Embedding(self.codebook_dim, self.d_model) self.linear = nn.Linear(self.codebook_dim, self.d_model) # TODO factorised positional embeddings positional_embedding_size = self.d_model self.positional_embeddings = nn.Parameter( torch.randn((1, num_tokens_bottleneck, positional_embedding_size))) self.upscale_embeddings = nn.ParameterList([ nn.Parameter(torch.randn(upscale, self.d_model)) for upscale in self.upscale_factors ]) # self.code_embedding_dim = self.d_model - positional_embedding_size # TODO for now sum positional embedding self.code_embedding_dim = self.d_model - positional_embedding_size encoder_layer = TransformerEncoderLayer( d_model=self.d_model, nhead=n_head, dim_feedforward=dim_feedforward, dropout=dropout) # NOTE layer_norm is already contained in encoder_layers self.transformers = nn.ModuleList([ TransformerEncoder( encoder_layer=encoder_layer, num_layers=num_layers, ) for num_layers in list_of_num_layers ]) self.pre_softmaxes = nn.ModuleList([ nn.Linear(self.d_model, num_notes) for num_notes in num_tokens_per_channel ])
def __init__(self, src_vocab_size=128, tgt_vocab_size=128, embedding_dim=128, fcn_hidden_dim=128, num_heads=4, num_layers=2, dropout=0.2, src_to_tgt_vocab_conversion_matrix=None): super(PointerGeneratorTransformer, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embedding_dim = embedding_dim self.src_to_tgt_vocab_conversion_matrix = src_to_tgt_vocab_conversion_matrix self.pos_encoder = PositionalEncoding(embedding_dim) # Source and target embeddings self.src_embed = Embedding(self.src_vocab_size, embedding_dim, padding_idx=2) self.tgt_embed = Embedding(self.tgt_vocab_size, embedding_dim, padding_idx=2) # Encoder layers self.encoder_layer = TransformerEncoderLayer(embedding_dim, num_heads, fcn_hidden_dim, dropout) self.encoder = TransformerEncoder(self.encoder_layer, num_layers) # Decoder layers self.decoder_layer = TransformerDecoderLayer(embedding_dim, num_heads, fcn_hidden_dim, dropout) self.decoder_final_layer = TransformerDecoderFinalLayer(embedding_dim, num_heads, fcn_hidden_dim, dropout) self.decoder = TransformerDecoder(self.decoder_layer, self.decoder_final_layer, num_layers) # Final linear layer + softmax. for probability over target vocabulary self.p_vocab = nn.Sequential( nn.Linear(self.embedding_dim, self.tgt_vocab_size), nn.Softmax(dim=-1)) # P_gen, probability of generating output self.p_gen = nn.Sequential( nn.Linear(self.embedding_dim * 3, 1), nn.Sigmoid()) # Context vector self.c_t = None # Initialize masks self.src_mask = None self.tgt_mask = None self.mem_mask = None # Initialize weights of model self._reset_parameters()
def __init__(self, depth, ibn_type=None, final_layer='layer3', neck=512, nhead=1, num_encoder_layers=2, dim_feedforward=2048, dropout=0., pretrained=True): super(ResNet, self).__init__() self.depth = depth self.final_layer = final_layer self.neck = neck self.pretrained = pretrained if depth not in ResNet.__factory: raise KeyError("Unsupported depth: ", depth) if ibn_type is not None and depth == 152: raise KeyError("Unsupported IBN-Net depth: ", depth) if ibn_type is None: # Construct base (pretrained) resnet print('\nCreate ResNet model ResNet-%d.\n' % depth) self.base = ResNet.__factory[depth](pretrained=pretrained) else: # Construct base (pretrained) IBN-Net model_name = 'resnet%d_ibn_%s' % (depth, ibn_type) print('\nCreate IBN-Net model %s.\n' % model_name) self.base = torch.hub.load('XingangPan/IBN-Net', model_name, pretrained=pretrained) if depth < 50: out_planes = fea_dims_small[final_layer] else: out_planes = fea_dims[final_layer] if neck > 0: self.neck_conv = nn.Conv2d(out_planes, neck, kernel_size=3, padding=1) out_planes = neck self.encoder = None if num_encoder_layers > 0: encoder_layer = TransformerEncoderLayer(out_planes, nhead, dim_feedforward, dropout) encoder_norm = None self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) self.num_features = out_planes
def __init__( self, num_layers, input_dim, num_tokens_per_channel, positional_embedding_size, d_model, dim_feedforward, n_head, num_tokens, dropout, ): raise NotImplementedError # must use its own data_processor super(TeacherAbsolute, self).__init__() self.num_channels = len(num_tokens_per_channel) self.positional_embeddings = nn.Parameter( torch.randn((1, num_tokens, positional_embedding_size))) self.num_layers = num_layers self.linear_to_input_transformer = nn.Linear( input_dim, d_model - positional_embedding_size) encoder_layer = TransformerEncoderLayer( d_model=d_model, nhead=n_head, dim_feedforward=dim_feedforward, dropout=dropout) self.transformer = TransformerEncoder( encoder_layer=encoder_layer, num_layers=self.num_layers, ) self.num_tokens_per_channel = num_tokens_per_channel self.pre_softmaxes = nn.ModuleList([ nn.Linear(d_model, num_notes) for num_notes in num_tokens_per_channel ])
def build_transformer_model(src_vocab_size: int, tgt_vocab_size: int, rnn_size: int = RNN_SIZE, num_head: int = 4, num_layers: int = 3, dim_ff: int = 1024, dropout: float = DROPOUT) -> EncoderDecoder: """ Build transformer model based on the paper "Attention Is All You Need". Arguments: src_vocab_size: vocab size for encoder tgt_vocab_size: vocab size for decoder rnn_size: size of RNN hidden states in encoder/decoder num_head: the number of heads in the multi headed attention num_layers: number of encoder/decoder layers dim_ff: the dimension of the feed forward layer dropout: the dropout probability value """ # Build encoder encoder_layer = TransformerEncoderLayer(rnn_size, num_head, dim_ff, dropout) encoder_norm = LayerNorm(rnn_size) encoder = TransformerEncoder(encoder_layer, num_layers, encoder_norm) # Build decoder decoder_layer = TransformerDecoderLayer(rnn_size, num_head, dim_ff, dropout) decoder_norm = LayerNorm(rnn_size) decoder = TransformerDecoder(decoder_layer, num_layers, decoder_norm) # Build generator generator = Generator(rnn_size, tgt_vocab_size) return EncoderDecoder(encoder, decoder, generator, rnn_size, src_vocab_size, tgt_vocab_size)
def __init__(self, vocab_size: int, d_model: int, n_head: int, n_layers: int, dim_ff: int, dropout: float, pad_id: int, n_class: int): super(TransformerSeqTagger, self).__init__() self.vocab_size = vocab_size self.d_model = d_model self.n_head = n_head self.n_layers = n_layers self.dim_ff = dim_ff self.dropout = dropout self.pad_id = pad_id self.embedding = nn.Embedding(vocab_size, d_model) self.position_embedding = PositionalEncoding(d_model, dropout) enc_layer = TransformerEncoderLayer(d_model, n_head, dim_ff) self.encoder = TransformerEncoder(enc_layer, n_layers) self.classifier = Classifier(d_model=d_model, class_num=n_class, d_ff=dim_ff, dropout=dropout)
def __init__(self, seq_length: int, output_seq_length: int, n_time_series: int, d_model=128, output_dim=1, n_layers_encoder=6, forward_dim=2048, dropout=0.1, use_mask=False, meta_data=None, final_act=None, squashed_embedding=False, n_heads=8): """Uses a number of encoder layers with simple linear decoder layer. :param seq_length: The number of historical time-steps fed into the model in each forward pass. :type seq_length: int :param output_seq_length: The number of forecasted time-steps outputted by the model. :type output_seq_length: int :param n_time_series: The total number of time series present (targets + features) :type n_time_series: int :param d_model: The embedding dim of the mode, defaults to 128 :type d_model: int, optional :param output_dim: The output dimension (should correspond to n_targets), defaults to 1 :type output_dim: int, optional :param n_layers_encoder: The number of encoder layers, defaults to 6 :type n_layers_encoder: int, optional :param forward_dim: The forward embedding dim, defaults to 2048 :type forward_dim: int, optional :param dropout: How much dropout to use, defaults to 0.1 :type dropout: float, optional :param use_mask: Whether to use subsquent sequence mask during training, defaults to False :type use_mask: bool, optional :param meta_data: Whether to use static meta-data, defaults to None :type meta_data: str, optional :param final_act: Whether to use a final activation function, defaults to None :type final_act: str, optional :param squashed_embedding: Whether to create a one 1-D time embedding, defaults to False :type squashed_embedding: bool, optional :param n_heads: [description], defaults to 8 :type n_heads: int, optional """ super().__init__() self.dense_shape = torch.nn.Linear(n_time_series, d_model) self.pe = SimplePositionalEncoding(d_model) encoder_layer = TransformerEncoderLayer(d_model, 8, forward_dim, dropout) encoder_norm = LayerNorm(d_model) self.transformer_enc = TransformerEncoder(encoder_layer, n_layers_encoder, encoder_norm) self.output_dim_layer = torch.nn.Linear(d_model, output_dim) self.output_seq_length = output_seq_length self.out_length_lay = torch.nn.Linear(seq_length, output_seq_length) self.mask = generate_square_subsequent_mask(seq_length) self.out_dim = output_dim self.mask_it = use_mask self.final_act = None self.squashed = None if final_act: self.final_act = activation_dict[final_act] if meta_data: self.meta_merger = MergingModel(meta_data["method"], meta_data["params"]) if squashed_embedding: self.squashed = torch.nn.Linear(seq_length, 1) self.unsquashed = torch.nn.Linear(1, seq_length)