Python Transformer 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: networks.transformer

클래스/타입: Transformer

hotexamples.com에서의 예제들: 5

Python Transformer - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 networks.transformer.Transformer에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Transformer(5)

자주 사용되는 메소드들

Transformer (5)

예제 #1

파일 보기

파일: model.py 프로젝트: schatzkara/REU2019

    def __init__(self, vp_value_count, output_shape, name='Full Network'):
        """
        Initializes the Full Network.
        :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape.
                              Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112)
        :param name: (str, optional) The name of the network (default 'Full Network').
        Raises:
            ValueError: if 'vp_value_count' is not a legal value count
            ValueError: if 'output_shape' does not contain a legal number of frames.
        """
        if vp_value_count not in self.VALID_VP_VALUE_COUNTS:
            raise ValueError('Invalid number of vp values: %d' % vp_value_count)
        if output_shape[2] not in self.VALID_FRAME_COUNTS:
            raise ValueError('Invalid number of frames in desired output: %d' % output_shape[2])

        super(FullNetwork, self).__init__()

        self.net_name = name
        self.vp_value_count = vp_value_count
        self.output_shape = output_shape
        self.out_frames = output_shape[2]
        self.rep_channels = 256
        self.rep_frames = 4
        self.rep_size = 14

        self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path)
        self.i3d = InceptionI3d(final_endpoint='Mixed_5c', in_frames=self.out_frames,
                                pretrained=True, weights_path=i3d_weights_path)

        self.exp = Expander(vp_value_count=self.vp_value_count, out_frames=self.rep_frames, out_size=self.rep_size)
        self.trans = Transformer(in_channels=self.rep_channels + self.vp_value_count, out_channels=self.rep_channels)

        self.gen = Generator(in_channels=[self.rep_channels, self.rep_channels], out_frames=self.out_frames)

예제 #2

파일 보기

    def __init__(self, num_classes=92, num_queries=100,
                 backbone=None,
                 pos_encoder=None,
                 transformer=None,
                 num_encoder_layers=6,
                 num_decoder_layers=6,
                 return_intermediate_dec=True,
                 **kwargs):
        super().__init__(**kwargs)
        self.num_queries = num_queries

        self.backbone = ResNet50Backbone(name='backbone')
        self.transformer = transformer or Transformer(
                num_encoder_layers=num_encoder_layers,
                num_decoder_layers=num_decoder_layers,
                return_intermediate_dec=return_intermediate_dec,
                name='transformer'
        )
        
        self.model_dim = self.transformer.model_dim

        self.pos_encoder = pos_encoder or PositionEmbeddingSine(
            num_pos_features=self.model_dim // 2, normalize=True)

        self.input_proj = tf.keras.layers.Conv2D(self.model_dim, kernel_size=1, name='input_proj')

        self.query_embed = FixedEmbedding((num_queries, self.model_dim),
                                          name='query_embed')

        self.class_embed = Linear(num_classes, name='class_embed')

        self.bbox_embed_linear1 = Linear(self.model_dim, name='bbox_embed_0')
        self.bbox_embed_linear2 = Linear(self.model_dim, name='bbox_embed_1')
        self.bbox_embed_linear3 = Linear(4, name='bbox_embed_2')
        self.activation = tf.keras.layers.ReLU()

예제 #3

파일 보기

파일: model.py 프로젝트: schatzkara/REU2019

    def __init__(self,
                 vp_value_count,
                 stdev,
                 output_shape,
                 pretrained=False,
                 vgg_weights_path='',
                 i3d_weights_path='',
                 name='Full Network'):
        """
        Initializes the Full Network.
        :param vp_value_count: (int) The number of values that identify the viewpoint.
        :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape.
                              Legal values: (bsz, 3, 8/16, 112, 112) and (bsz, 3, 16, 112, 112)
        :param name: (str, optional) The name of the network (default 'Full Network').
        Raises:
            ValueError: if 'vp_value_count' is not a legal value count
            ValueError: if 'output_shape' does not contain a legal number of frames.
        """
        if vp_value_count not in self.VALID_VP_VALUE_COUNTS:
            raise ValueError('Invalid number of vp values: %d' %
                             vp_value_count)
        if output_shape[2] not in self.VALID_FRAME_COUNTS:
            raise ValueError('Invalid number of frames in desired output: %d' %
                             output_shape[2])

        super(FullNetwork, self).__init__()

        self.net_name = name
        self.vp_value_count = vp_value_count
        self.stdev = stdev
        self.output_shape = output_shape
        self.out_frames = output_shape[2]

        # specs of various features
        self.app_feat = 128
        self.rep_feat = 128
        self.rep_frames = 4
        self.rep_size = 14
        self.nkp = 32

        self.vgg = vgg16(pretrained=pretrained, weights_path=vgg_weights_path)
        self.i3d = InceptionI3d(final_endpoint='Mixed_5c',
                                in_frames=self.out_frames,
                                pretrained=pretrained,
                                weights_path=i3d_weights_path)

        self.exp = Expander(vp_value_count=self.vp_value_count)

        # convs to make all appearance encodings have same number of channels, so they can be used in the same convGRU
        self.app_conv128 = nn.Conv2d(in_channels=128,
                                     out_channels=self.app_feat,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_conv256a = nn.Conv2d(in_channels=256,
                                      out_channels=self.app_feat,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=(1, 1))
        self.app_conv256b = nn.Conv2d(in_channels=256,
                                      out_channels=self.app_feat,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=(1, 1))
        self.app_convs = [
            nn.Sequential(self.app_conv128, nn.ReLU(inplace=True)),
            nn.Sequential(self.app_conv256a, nn.ReLU(inplace=True)),
            nn.Sequential(self.app_conv256b, nn.ReLU(inplace=True))
        ]

        # convs to make all motion features have the same number of channels, so they can be used in the same trans net
        self.rep_conv64 = nn.Conv3d(in_channels=64,
                                    out_channels=self.rep_feat,
                                    kernel_size=(3, 3, 3),
                                    stride=(1, 1, 1),
                                    padding=(1, 1, 1))
        self.rep_conv192 = nn.Conv3d(in_channels=192,
                                     out_channels=self.rep_feat,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_conv256 = nn.Conv3d(in_channels=256,
                                     out_channels=self.rep_feat,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_convs = [
            nn.Sequential(self.rep_conv64, nn.ReLU(inplace=True)),
            nn.Sequential(self.rep_conv192, nn.ReLU(inplace=True)),
            nn.Sequential(self.rep_conv256, nn.ReLU(inplace=True))
        ]

        self.trans = Transformer(in_channels=self.rep_feat +
                                 self.vp_value_count,
                                 out_channels=self.rep_feat)

        self.kpp = KPPredictor(in_channels=self.rep_feat,
                               nkp=self.nkp,
                               stdev=self.stdev)

        self.vpp = VPPredictor(in_channels=256)

        self.gru = ConvGRU(input_dim=self.rep_feat,
                           hidden_dim=[self.app_feat],
                           kernel_size=(7, 7),
                           num_layers=1,
                           batch_first=True,
                           bias=False,
                           return_all_layers=False)

        self.gen = Generator(in_channels=[self.app_feat, self.nkp],
                             out_frames=self.out_frames)

예제 #4

파일 보기

    def __init__(self, vp_value_count, output_shape, name='Full Network'):
        """
        Initializes the Full Network.
        :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape.
                              Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112)
        :param name: (str, optional) The name of the network (default 'Full Network').
        Raises:
            ValueError: if 'vp_value_count' is not a legal value count
            ValueError: if 'output_shape' does not contain a legal number of frames.
        """
        if vp_value_count not in self.VALID_VP_VALUE_COUNTS:
            raise ValueError('Invalid number of vp values: %d' %
                             vp_value_count)
        if output_shape[2] not in self.VALID_FRAME_COUNTS:
            raise ValueError('Invalid number of frames in desired output: %d' %
                             output_shape[2])

        super(FullNetwork, self).__init__()

        self.net_name = name
        self.vp_value_count = vp_value_count
        self.output_shape = output_shape
        self.out_frames = output_shape[2]
        self.rep_channels = 256
        self.rep_frames = 4
        self.rep_size = 14

        self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path)
        self.i3d = InceptionI3d(final_endpoint='Mixed_5c',
                                in_frames=self.out_frames,
                                pretrained=True,
                                weights_path=i3d_weights_path)

        self.exp = Expander(vp_value_count=self.vp_value_count)

        # convs to make all appearance encoding have same number of channels, so they can be used in the same convLSTM
        self.app_conv128 = nn.Conv2d(in_channels=128,
                                     out_channels=256,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_conv256a = nn.Conv2d(in_channels=256,
                                      out_channels=256,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=(1, 1))
        self.app_conv256b = nn.Conv2d(in_channels=512,
                                      out_channels=256,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=(1, 1))
        self.app_convs = [
            self.app_conv128, self.app_conv256a, self.app_conv256b
        ]

        # convs for the initial hidden and current states of the convLSTM
        self.hconv = nn.Conv2d(in_channels=256,
                               out_channels=128,
                               kernel_size=(3, 3),
                               stride=(1, 1),
                               padding=(1, 1))
        self.cconv = nn.Conv2d(in_channels=256,
                               out_channels=128,
                               kernel_size=(3, 3),
                               stride=(1, 1),
                               padding=(1, 1))

        # convs to make all motion features have the same number of channels, so they can be used in the same Trans Net
        self.rep_conv64 = nn.Conv3d(in_channels=64,
                                    out_channels=256,
                                    kernel_size=(3, 3, 3),
                                    stride=(1, 1, 1),
                                    padding=(1, 1, 1))
        self.rep_conv192 = nn.Conv3d(in_channels=192,
                                     out_channels=256,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_conv256 = nn.Conv3d(in_channels=256,
                                     out_channels=256,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_convs = {
            64: self.rep_conv64,
            192: self.rep_conv192,
            256: self.rep_conv256
        }

        self.trans = Transformer(in_channels=256 + self.vp_value_count,
                                 out_channels=128)

        self.conv_lstm = ConvLSTM(input_dim=128,
                                  hidden_dim=[128],
                                  kernel_size=(3, 3),
                                  num_layers=1,
                                  batch_first=True,
                                  bias=False,
                                  return_all_layers=False)

        self.gen = Generator(in_channels=[128], out_frames=self.out_frames)

예제 #5

파일 보기

파일: model.py 프로젝트: schatzkara/REU2019

    def __init__(self, vp_value_count, output_shape, name='Full Network'):
        """
        Initializes the Full Network.
        :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape.
                              Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112)
        :param name: (str, optional) The name of the network (default 'Full Network').
        Raises:
            ValueError: if 'vp_value_count' is not a legal value count
            ValueError: if 'output_shape' does not contain a legal number of frames.
        """
        if vp_value_count not in self.VALID_VP_VALUE_COUNTS:
            raise ValueError('Invalid number of vp values: %d' %
                             vp_value_count)
        if output_shape[2] not in self.VALID_FRAME_COUNTS:
            raise ValueError('Invalid number of frames in desired output: %d' %
                             output_shape[2])

        super(FullNetwork, self).__init__()

        # params
        self.net_name = name
        self.vp_value_count = vp_value_count
        self.output_shape = output_shape
        self.out_frames = output_shape[2]
        self.rep_feat = 128
        self.app_feat = 256

        # networks
        self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path)
        self.i3d = InceptionI3d(final_endpoint='Mixed_5c',
                                in_frames=self.out_frames,
                                pretrained=True,
                                weights_path=i3d_weights_path)
        self.exp = Expander(vp_value_count=self.vp_value_count)
        self.trans = Transformer(in_channels=self.rep_feat +
                                 self.vp_value_count,
                                 out_channels=self.rep_feat)
        self.gen = Generator(in_channels=[self.app_feat, self.rep_feat],
                             out_frames=self.out_frames)

        self.conv_lstms = {
            56:
            ConvLSTM(input_dim=self.rep_feat,
                     hidden_dim=[self.app_feat],
                     kernel_size=(3, 3),
                     num_layers=1,
                     in_shape=(56, 56),
                     batch_first=True,
                     bias=False,
                     return_all_layers=False),
            28:
            ConvLSTM(input_dim=self.rep_feat,
                     hidden_dim=[self.app_feat],
                     kernel_size=(3, 3),
                     num_layers=1,
                     in_shape=(28, 28),
                     batch_first=True,
                     bias=False,
                     return_all_layers=False),
            14:
            ConvLSTM(input_dim=self.rep_feat,
                     hidden_dim=[self.app_feat],
                     kernel_size=(3, 3),
                     num_layers=1,
                     in_shape=(14, 14),
                     batch_first=True,
                     bias=False,
                     return_all_layers=False)
        }

        # convs
        self.app_conv128 = nn.Conv2d(in_channels=128,
                                     out_channels=self.app_feat,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_conv256 = nn.Conv2d(in_channels=256,
                                     out_channels=self.app_feat,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_conv512 = nn.Conv2d(in_channels=512,
                                     out_channels=self.app_feat,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_convs = {
            128: self.app_conv128,
            256: self.app_conv256,
            512: self.app_conv512
        }

        self.hconv = nn.Conv2d(in_channels=self.app_feat,
                               out_channels=128,
                               kernel_size=(3, 3),
                               stride=(1, 1),
                               padding=(1, 1))
        self.cconv = nn.Conv2d(in_channels=self.app_feat,
                               out_channels=128,
                               kernel_size=(3, 3),
                               stride=(1, 1),
                               padding=(1, 1))

        self.rep_conv64 = nn.Conv3d(in_channels=64,
                                    out_channels=self.rep_feat,
                                    kernel_size=(3, 3, 3),
                                    stride=(1, 1, 1),
                                    padding=(1, 1, 1))
        self.rep_conv192 = nn.Conv3d(in_channels=192,
                                     out_channels=self.rep_feat,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_conv256 = nn.Conv3d(in_channels=256,
                                     out_channels=self.rep_feat,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_convs = {
            64: self.rep_conv64,
            192: self.rep_conv192,
            256: self.rep_conv256
        }