def __init__(self, **kwargs): TrainableNM.__init__(self, **kwargs) params = self.local_parameters embedding_params = { "vocab_size": params["vocab_size"], "hidden_size": params["d_model"], "max_sequence_length": params["max_seq_length"], "embedding_dropout": params.get("embedding_dropout", 0), "learn_positional_encodings": params.get("learn_positional_encodings", False) } backbone_params = { "num_layers": params["num_layers"], "hidden_size": params["d_model"], "mask_future": params.get("mask_future", False), "num_attention_heads": params["num_attn_heads"], "inner_size": params["d_inner"], "ffn_dropout": params.get("ffn_dropout", 0), "hidden_act": params.get("hidden_act", "relu"), "attn_score_dropout": params.get("attn_score_dropout", 0), "attn_layer_dropout": params.get("attn_layer_dropout", 0) } self.embedding_layer = TransformerEmbedding(**embedding_params) self.encoder = TransformerEncoder(**backbone_params) std_init_range = 1 / math.sqrt(params["d_model"]) self.apply( lambda module: transformer_weights_init(module, std_init_range)) self.to(self._device)
def __init__(self, decoder, log_softmax, max_seq_length, pad_token, bos_token, eos_token, batch_size=1, beam_size=4, max_delta_length=50, length_penalty=0, **kwargs): TrainableNM.__init__(self, **kwargs) self.generator = BeamSearchSequenceGenerator( decoder.embedding_layer, decoder.decoder, log_softmax, max_sequence_length=max_seq_length, max_delta_length=max_delta_length, pad=pad_token, bos=bos_token, eos=eos_token, batch_size=batch_size, beam_size=beam_size, len_pen=length_penalty, )
def __init__(self, *, feat_in, num_classes, init_mode="xavier_uniform", return_logits=True, pooling_type='avg', **kwargs): TrainableNM.__init__(self, **kwargs) self._feat_in = feat_in self._return_logits = return_logits self._num_classes = num_classes if pooling_type == 'avg': self.pooling = nn.AdaptiveAvgPool1d(1) elif pooling_type == 'max': self.pooling = nn.AdaptiveMaxPool1d(1) else: raise ValueError( 'Pooling type chosen is not valid. Must be either `avg` or `max`' ) self.decoder_layers = nn.Sequential( nn.Linear(self._feat_in, self._num_classes, bias=True)) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device)
def __init__(self, feat_in, num_classes, emb_sizes=[1024, 1024], pool_mode='xvector', init_mode="xavier_uniform"): TrainableNM.__init__(self) self._feat_in = 0 if pool_mode == 'gram': gram = True super_vector = False elif pool_mode == 'superVector': gram = True super_vector = True else: gram = False super_vector = False if gram: self._feat_in += feat_in ** 2 else: self._feat_in += 2 * feat_in if super_vector and gram: self._feat_in += 2 * feat_in self._midEmbd1 = int(emb_sizes[0]) # Spkr Vector Embedding Shape self._midEmbd2 = int(emb_sizes[1]) if len(emb_sizes) > 1 else 0 # Spkr Vector Embedding Shape self._num_classes = num_classes self._pooling = StatsPoolLayer(gram=gram, super_vector=super_vector) self.mid1 = self.affineLayer(self._feat_in, self._midEmbd1, learn_mean=False) self.mid2 = self.affineLayer(self._midEmbd1, self._midEmbd2, learn_mean=False) self.final = nn.Linear(self._midEmbd2, self._num_classes) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device)
def __init__(self, *, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, random_init=False, **kwargs): TrainableNM.__init__(self, **kwargs) # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "BERT constructor.") if vocab_size is not None: config = BertConfig( vocab_size_or_config_json_file=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings) model = BertModel(config) elif pretrained_model_name is not None: model = BertModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = BertConfig.from_json_file(config_filename) model = BertModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + "be passed into the BERT constructor") model.to(self._device) self.add_module("bert", model) self.config = model.config if random_init: self.apply( lambda module: transformer_weights_init(module, xavier=False))
def __init__(self, **kwargs): TrainableNM.__init__(self, **kwargs) self.hidden_size = self.local_parameters["d_model"] self.qa_outputs = nn.Linear(self.hidden_size, 2) self.qa_outputs.apply(transformer_weights_init) self.qa_outputs.to(self._device)
def __init__(self, *, vocab_size, d_model, **kwargs): TrainableNM.__init__(self, **kwargs) self.log_softmax = TransformerLogSoftmax(vocab_size=vocab_size, hidden_size=d_model) self.log_softmax.apply(transformer_weights_init) self.log_softmax.to(self._device)
def __init__(self, *, d_model, num_classes, **kwargs): TrainableNM.__init__(self, **kwargs) self.log_softmax = ClassificationLogSoftmax(hidden_size=d_model, num_classes=num_classes) self.log_softmax.apply(transformer_weights_init) self.log_softmax.to(self._device)
def __init__(self, *, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, **kwargs): TrainableNM.__init__(self, **kwargs) # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "BERT constructor.") if vocab_size is not None: config = BertConfig( vocab_size_or_config_json_file=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings, ) model = BertModel(config) elif pretrained_model_name is not None: model = BertModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = BertConfig.from_json_file(config_filename) model = BertModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + " be passed into the BERT constructor") model.to(self._device) self.add_module("bert", model) self.config = model.config for key, value in self.config.to_dict().items(): self._local_parameters[key] = value
def __init__( self, *, jasper, activation, feat_in, normalization_mode="batch", residual_mode="add", norm_groups=-1, conv_mask=True, frame_splicing=1, init_mode='xavier_uniform', **kwargs ): TrainableNM.__init__(self, **kwargs) activation = jasper_activations[activation]() feat_in = feat_in * frame_splicing residual_panes = [] encoder_layers = [] self.dense_residual = False for lcfg in jasper: dense_res = [] if lcfg.get('residual_dense', False): residual_panes.append(feat_in) dense_res = residual_panes self.dense_residual = True groups = lcfg.get('groups', 1) separable = lcfg.get('separable', False) heads = lcfg.get('heads', -1) encoder_layers.append( JasperBlock( feat_in, lcfg['filters'], repeat=lcfg['repeat'], kernel_size=lcfg['kernel'], stride=lcfg['stride'], dilation=lcfg['dilation'], dropout=lcfg['dropout'], residual=lcfg['residual'], groups=groups, separable=separable, heads=heads, residual_mode=residual_mode, normalization=normalization_mode, norm_groups=norm_groups, activation=activation, residual_panes=dense_res, conv_mask=conv_mask, ) ) feat_in = lcfg['filters'] self.encoder = nn.Sequential(*encoder_layers) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device)
def __init__(self, *, voc_size, hidden_size, dropout=0.0, **kwargs): TrainableNM.__init__(self, **kwargs) self.voc_size = voc_size self.hidden_size = hidden_size self.dropout = dropout self.embedding = nn.Embedding(self.voc_size, self.hidden_size) if self.dropout != 0.0: self.embedding_dropout = nn.Dropout(self.dropout)
def __init__(self, *, from_dim, to_dim, dropout=0.0, **kwargs): TrainableNM.__init__(self, **kwargs) self.from_dim = from_dim self.to_dim = to_dim self.dropout = dropout self.projection = nn.Linear(self.from_dim, self.to_dim, bias=False) if self.dropout != 0.0: self.embedding_dropout = nn.Dropout(self.dropout)
def __init__(self, hidden_size, num_classes, dropout, **kwargs): TrainableNM.__init__(self, **kwargs) self.hidden_size = hidden_size self.num_classes = num_classes self.dropout = nn.Dropout(dropout) self.dense = nn.Linear(self.hidden_size, self.hidden_size) self.classifier = nn.Linear(self.hidden_size, self.num_classes) self.apply( lambda module: transformer_weights_init(module, xavier=False)) self.to(self._device)
def __init__(self, *, feat_in, num_classes, init_mode="xavier_uniform", **kwargs): TrainableNM.__init__(self, **kwargs) self._feat_in = feat_in # Add 1 for blank char self._num_classes = num_classes + 1 self.decoder_layers = nn.Sequential(nn.Conv1d(self._feat_in, self._num_classes, kernel_size=1, bias=True)) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device)
def __init__(self, **kwargs): TrainableNM.__init__(self, **kwargs) self.hidden_size = self.local_parameters["d_model"] self.num_labels = self.local_parameters["num_labels"] self.dropout = nn.Dropout(self.local_parameters["dropout"]) self.classifier = nn.Linear(self.hidden_size, self.num_labels) self.apply( lambda module: transformer_weights_init(module, xavier=False)) self.to(self._device)
def __init__(self, decoder, log_softmax, **kwargs): TrainableNM.__init__(self, **kwargs) generator_params = { "max_sequence_length": self.local_parameters["max_seq_length"], "pad": self.local_parameters["pad_token"], "bos": self.local_parameters["bos_token"], "eos": self.local_parameters["eos_token"], "batch_size": self.local_parameters.get("batch_size", 1) } self.generator = GreedySequenceGenerator(decoder, log_softmax, **generator_params)
def __init__(self, *, dim, **kwargs): # Part specific for Neural Modules API: # (1) call base constructor # (2) define input and output ports TrainableNM.__init__(self, **kwargs) # And of Neural Modules specific part. Rest is Pytorch code self._dim = dim self.fc1 = nn.Linear(self._dim, 1) t.nn.init.xavier_uniform_(self.fc1.weight) self._device = t.device("cuda" if self.placement == DeviceType.GPU else "cpu") self.to(self._device)
def __init__( self, *, sample_rate=16000, window_size=0.02, window_stride=0.01, window="hann", normalize="per_feature", n_fft=None, preemph=0.97, features=64, lowfreq=0, highfreq=None, feat_type="logfbank", dither=1e-5, pad_to=16, frame_splicing=1, stft_conv=False, **kwargs ): if "fbank" not in feat_type: raise NotImplementedError("AudioPreprocessing currently only " "accepts 'fbank' or 'logfbank' as " "feat_type") TrainableNM.__init__(self, **kwargs) self.featurizer = FilterbankFeatures( sample_rate=sample_rate, window_size=window_size, window_stride=window_stride, window=window, normalize=normalize, n_fft=n_fft, preemph=preemph, nfilt=features, lowfreq=lowfreq, highfreq=highfreq, dither=dither, pad_to=pad_to, frame_splicing=frame_splicing, stft_conv=stft_conv, logger=self._logger ) # _pre_procesing_config = self.local_parameters # self.featurizer = FeatureFactory.from_config(_pre_procesing_config) self.featurizer.to(self._device) self.disable_casts = (self._opt_level == Optimization.mxprO1)
def __init__(self, decoder, log_softmax, **kwargs): TrainableNM.__init__(self, **kwargs) params = self.local_parameters generator_params = { "max_sequence_length": params["max_seq_length"], "max_delta_length": params.get("max_delta_length", 50), "pad": params["pad_token"], "bos": params["bos_token"], "eos": params["eos_token"], "batch_size": params.get("batch_size", 1), "beam_size": params.get("beam_size", 4), "len_pen": params.get("length_penalty", 0) } self.generator = BeamSearchSequenceGenerator(decoder.embedding_layer, decoder.decoder, log_softmax, **generator_params)
def __init__(self, decoder, log_softmax, max_seq_length, pad_token, bos_token, eos_token, batch_size=1, **kwargs): TrainableNM.__init__(self, **kwargs) self.generator = GreedySequenceGenerator( decoder.embedding_layer, decoder.decoder, log_softmax, max_sequence_length=max_seq_length, pad=pad_token, bos=bos_token, eos=eos_token, batch_size=batch_size)
def __init__(self, vocab_size, d_model, d_inner, max_seq_length, num_layers, num_attn_heads, ffn_dropout=0.0, embedding_dropout=0.0, attn_score_dropout=0.0, attn_layer_dropout=0.0, learn_positional_encodings=False, hidden_act='relu', mask_future=False, **kwargs): TrainableNM.__init__(self, **kwargs) self.embedding_layer = TransformerEmbedding( vocab_size=vocab_size, hidden_size=d_model, max_sequence_length=max_seq_length, embedding_dropout=embedding_dropout, learn_positional_encodings=learn_positional_encodings, ) self.encoder = TransformerEncoder( num_layers=num_layers, hidden_size=d_model, mask_future=mask_future, num_attention_heads=num_attn_heads, inner_size=d_inner, ffn_dropout=ffn_dropout, hidden_act=hidden_act, attn_score_dropout=attn_score_dropout, attn_layer_dropout=attn_layer_dropout, ) std_init_range = 1 / math.sqrt(d_model) self.apply( lambda module: transformer_weights_init(module, std_init_range)) self.to(self._device)
def __init__(self, **kwargs): TrainableNM.__init__(self, **kwargs)
def __init__(self, **kwargs): TrainableNM.__init__(self, **kwargs) self._loss_fn = SequenceClassificationLoss()
def __init__(self, mode="add", **kwargs): TrainableNM.__init__(self, **kwargs) self._mode = mode
def __init__(self, **kwargs): TrainableNM.__init__(self, **kwargs) label_smoothing = self.local_parameters.get("label_smoothing", 0.0) self._loss_fn = SmoothedCrossEntropyLoss(label_smoothing)
def __init__( self, input_size: int, output_size: int, hidden_sizes: List[int] = [], dimensions: int = 2, dropout_rate: float = 0, name: Optional[str] = None, ): """ Initializes the feed-forwad network. Args: input_size: Size of input (1D) output_sizes: Size of the output (1D) hidden_sizes: Sizes of the consecutive hidden layers (DEFAULT: [] = no hidden) dimensions: Number of dimensions of input/output tensors (DEFAULT: 2 = BATCH X INPUT_SIZE) dropout_rate: Dropout rage (Default: 0) name: Name of the module (DEFAULT: None) """ # Call constructor of the parent class. TrainableNM.__init__(self, name=name) # Get input size. self._input_size = input_size if type(self._input_size) == list: if len(self._input_size) == 1: self._input_size = self._input_size[0] else: raise ConfigurationError( "'input_size' must be a single value (received {})".format( self._input_size)) # Get input/output dimensions, i.e. number of axes of the input [BATCH_SIZE x ... x INPUT_SIZE]. # The module will "broadcast" over those dimensions. self._dimensions = dimensions if self._dimensions < 2: raise ConfigurationError( "'dimensions' must be bigger than two (received {})".format( self._dimensions)) # Get output (prediction/logits) size. self._output_size = output_size if type(self._output_size) == list: if len(self._output_size) == 1: self._output_size = self._output_size[0] else: raise ConfigurationError( "'output_size' must be a single value (received {})". format(self._output_size)) logging.info( "Initializing network with input size = {} and output size = {}". format(self._input_size, self._output_size)) # Create the module list. modules = [] # Retrieve number of hidden layers, along with their sizes (numbers of hidden neurons from configuration). if type(hidden_sizes) == list: # Stack linear layers. input_dim = self._input_size for hidden_dim in hidden_sizes: # Add linear layer. modules.append(torch.nn.Linear(input_dim, hidden_dim)) # Add activation. modules.append(torch.nn.ReLU()) # Add dropout. if dropout_rate > 0: modules.append(torch.nn.Dropout(dropout_rate)) # Remember size. input_dim = hidden_dim # Add the last output" (or in a special case: the only) layer. modules.append(torch.nn.Linear(input_dim, self._output_size)) logging.info("Created {} hidden layers with sizes {}".format( len(hidden_sizes), hidden_sizes)) else: raise ConfigurationError( "'hidden_sizes' must contain a list with numbers of neurons in consecutive hidden layers (received {})" .format(hidden_sizes)) # Finally create the sequential model out of those modules. self.layers = torch.nn.Sequential(*modules)
def __init__( self, input_depth: int, input_height: int, input_width: int, conv1_out_channels: int = 64, conv1_kernel_size: int = 3, conv1_stride: int = 1, conv1_padding: int = 0, maxpool1_kernel_size: int = 2, conv2_out_channels: int = 32, conv2_kernel_size: int = 3, conv2_stride: int = 1, conv2_padding: int = 0, maxpool2_kernel_size: int = 2, conv3_out_channels: int = 16, conv3_kernel_size: int = 3, conv3_stride: int = 1, conv3_padding: int = 0, maxpool3_kernel_size: int = 2, name: Optional[str] = None, ): """ Constructor of the a simple CNN. The overall structure of this CNN is as follows: (Conv1 -> MaxPool1 -> ReLu) -> (Conv2 -> MaxPool2 -> ReLu) -> (Conv3 -> MaxPool3 -> ReLu) The parameters that the user can change are: - For Conv1, Conv2 & Conv3: number of output channels, kernel size, stride and padding. - For MaxPool1, MaxPool2 & MaxPool3: Kernel size .. note:: We are using the default values of ``dilatation``, ``groups`` & ``bias`` for ``nn.Conv2D``. Similarly for the ``stride``, ``padding``, ``dilatation``, ``return_indices`` & ``ceil_mode`` of \ ``nn.MaxPool2D``. Args: input_depth: Depth of the input image input_height: Height of the input image input_width: Width of the input image convX_out_channels: Number of output channels of layer X (X=1,2,3) convX_kernel_size: Kernel size of layer X (X=1,2,3) convX_stride: Stride of layer X (X=1,2,3) convX_padding: Padding of layer X (X=1,2,3) name: Name of the module (DEFAULT: None) """ # Call base constructor. TrainableNM.__init__(self, name=name) # Get input image information from the global parameters. self._input_depth = input_depth self._input_height = input_height self._input_width = input_width # Retrieve the Conv1 parameters. self._conv1_out_channels = conv1_out_channels self._conv1_kernel_size = conv1_kernel_size self._conv1_stride = conv1_stride self._conv1_padding = conv1_padding # Retrieve the MaxPool1 parameter. self._maxpool1_kernel_size = maxpool1_kernel_size # Retrieve the Conv2 parameters. self._conv2_out_channels = conv2_out_channels self._conv2_kernel_size = conv2_kernel_size self._conv2_stride = conv2_stride self._conv2_padding = conv2_padding # Retrieve the MaxPool2 parameter. self._maxpool2_kernel_size = maxpool2_kernel_size # Retrieve the Conv3 parameters. self._conv3_out_channels = conv3_out_channels self._conv3_kernel_size = conv3_kernel_size self._conv3_stride = conv3_stride self._conv3_padding = conv3_padding # Retrieve the MaxPool3 parameter. self._maxpool3_kernel_size = maxpool3_kernel_size # We can compute the spatial size of the output volume as a function of the input volume size (W), # the receptive field size of the Conv Layer neurons (F), the stride with which they are applied (S), # and the amount of zero padding used (P) on the border. # The corresponding equation is conv_size = ((W−F+2P)/S)+1. # doc for nn.Conv2D: https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d # doc for nn.MaxPool2D: https://pytorch.org/docs/stable/nn.html#torch.nn.MaxPool2d # ---------------------------------------------------- # Conv1 self._conv1 = nn.Conv2d( in_channels=self._input_depth, out_channels=self._conv1_out_channels, kernel_size=self._conv1_kernel_size, stride=self._conv1_stride, padding=self._conv1_padding, dilation=1, groups=1, bias=True, ) width_features_conv1 = np.floor(( (self._input_width - self._conv1_kernel_size + 2 * self._conv1_padding) / self._conv1_stride) + 1) height_features_conv1 = np.floor(( (self._input_height - self._conv1_kernel_size + 2 * self._conv1_padding) / self._conv1_stride) + 1) # ---------------------------------------------------- # MaxPool1 self._maxpool1 = nn.MaxPool2d(kernel_size=self._maxpool1_kernel_size) width_features_maxpool1 = np.floor(( (width_features_conv1 - self._maxpool1_kernel_size + 2 * self._maxpool1.padding) / self._maxpool1.stride) + 1) height_features_maxpool1 = np.floor(( (height_features_conv1 - self._maxpool1_kernel_size + 2 * self._maxpool1.padding) / self._maxpool1.stride) + 1) # ---------------------------------------------------- # Conv2 self._conv2 = nn.Conv2d( in_channels=self._conv1_out_channels, out_channels=self._conv2_out_channels, kernel_size=self._conv2_kernel_size, stride=self._conv2_stride, padding=self._conv2_padding, dilation=1, groups=1, bias=True, ) width_features_conv2 = np.floor(( (width_features_maxpool1 - self._conv2_kernel_size + 2 * self._conv2_padding) / self._conv2_stride) + 1) height_features_conv2 = np.floor(( (height_features_maxpool1 - self._conv2_kernel_size + 2 * self._conv2_padding) / self._conv2_stride) + 1) # ---------------------------------------------------- # MaxPool2 self._maxpool2 = nn.MaxPool2d(kernel_size=self._maxpool2_kernel_size) width_features_maxpool2 = np.floor(( (width_features_conv2 - self._maxpool2_kernel_size + 2 * self._maxpool2.padding) / self._maxpool2.stride) + 1) height_features_maxpool2 = np.floor(( (height_features_conv2 - self._maxpool2_kernel_size + 2 * self._maxpool2.padding) / self._maxpool2.stride) + 1) # ---------------------------------------------------- # Conv3 self._conv3 = nn.Conv2d( in_channels=self._conv2_out_channels, out_channels=self._conv3_out_channels, kernel_size=self._conv3_kernel_size, stride=self._conv3_stride, padding=self._conv3_padding, dilation=1, groups=1, bias=True, ) width_features_conv3 = np.floor(( (width_features_maxpool2 - self._conv3_kernel_size + 2 * self._conv3_padding) / self._conv3_stride) + 1) height_features_conv3 = np.floor(( (height_features_maxpool2 - self._conv3_kernel_size + 2 * self._conv3_padding) / self._conv3_stride) + 1) # ---------------------------------------------------- # MaxPool3 self._maxpool3 = nn.MaxPool2d(kernel_size=self._maxpool3_kernel_size) width_features_maxpool3 = np.floor(( (width_features_conv3 - self._maxpool3_kernel_size + 2 * self._maxpool3.padding) / self._maxpool3.stride) + 1) height_features_maxpool3 = np.floor(( (height_features_conv3 - self._maxpool1_kernel_size + 2 * self._maxpool3.padding) / self._maxpool3.stride) + 1) # Rememvber the output dims. self._feature_map_height = height_features_maxpool3 self._feature_map_width = width_features_maxpool3 self._feature_map_depth = self._conv3_out_channels # Log info about dimensions. logging.info('Input shape: [-1, {}, {}, {}]'.format( self._input_depth, self._input_height, self._input_width)) logging.debug('Computed output shape of each layer:') logging.debug(' * Conv1: [-1, {}, {}, {}]'.format( self._conv1_out_channels, height_features_conv1, width_features_conv1)) logging.debug(' * MaxPool1: [-1, {}, {}, {}]'.format( self._conv1_out_channels, height_features_maxpool1, width_features_maxpool1)) logging.debug(' * Conv2: [-1, {}, {}, {}]'.format( self._conv2_out_channels, height_features_conv2, width_features_conv2)) logging.debug(' * MaxPool2: [-1, {}, {}, {}]'.format( self._conv2_out_channels, height_features_maxpool2, width_features_maxpool2)) logging.debug(' * Conv3: [-1, {}, {}, {}]'.format( self._conv3_out_channels, height_features_conv3, width_features_conv3, )) logging.debug(' * MaxPool3: [-1, {}, {}, {}]'.format( self._conv3_out_channels, width_features_maxpool3, height_features_maxpool3)) logging.info('Output shape: [-1, {}, {}, {}]'.format( self._feature_map_depth, self._feature_map_height, self._feature_map_width))
def __init__( self, model_type: str, output_size: Optional[int] = None, return_feature_maps: bool = False, pretrained: bool = False, name: Optional[str] = None, ): """ Initializes the ``ImageEncoder`` model, creates the required "backbone". Args: model_type: Type of backbone (Options: VGG16 | DenseNet121 | ResNet152 | ResNet50) output_size: Size of the output layer (Optional, Default: None) return_feature_maps: Return mode: image embeddings vs feature maps (Default: False) pretrained: Loads pretrained model (Default: False) name: Name of the module (DEFAULT: None) """ TrainableNM.__init__(self, name=name) # Get operation modes. self._return_feature_maps = return_feature_maps # Get model type. self._model_type = get_value_from_dictionary( model_type, "vgg16 | densenet121 | resnet152 | resnet50".split(" | ") ) # Get output size (optional - not in feature_maps). self._output_size = output_size if self._model_type == 'vgg16': # Get VGG16 self._model = models.vgg16(pretrained=pretrained) if self._return_feature_maps: # Use only the "feature encoder". self._model = self._model.features # Remember the output feature map dims. self._feature_map_height = 7 self._feature_map_width = 7 self._feature_map_depth = 512 else: # Use the whole model, but "reshape"/reinstantiate the last layer ("FC6"). self._model.classifier._modules['6'] = torch.nn.Linear(4096, self._output_size) elif self._model_type == 'densenet121': # Get densenet121 self._model = models.densenet121(pretrained=pretrained) if self._return_feature_maps: raise ConfigurationError("'densenet121' doesn't support 'return_feature_maps' mode (yet)") # Use the whole model, but "reshape"/reinstantiate the last layer ("FC6"). self._model.classifier = torch.nn.Linear(1024, self._output_size) elif self._model_type == 'resnet152': # Get resnet152 self._model = models.resnet152(pretrained=pretrained) if self._return_feature_maps: # Get all modules exluding last (avgpool) and (fc) modules = list(self._model.children())[:-2] self._model = torch.nn.Sequential(*modules) # Remember the output feature map dims. self._feature_map_height = 7 self._feature_map_width = 7 self._feature_map_depth = 2048 else: # Use the whole model, but "reshape"/reinstantiate the last layer ("FC6"). self._model.fc = torch.nn.Linear(2048, self._output_size) elif self._model_type == 'resnet50': # Get resnet50 self._model = models.resnet50(pretrained=pretrained) if self._return_feature_maps: # Get all modules exluding last (avgpool) and (fc) modules = list(self._model.children())[:-2] self._model = torch.nn.Sequential(*modules) # Remember the output feature map dims. self._feature_map_height = 7 self._feature_map_width = 7 self._feature_map_depth = 2048 else: # Use the whole model, but "reshape"/reinstantiate the last layer ("FC6"). self._model.fc = torch.nn.Linear(2048, self._output_size)
def __str__(self): name = TrainableNM.__str__(self) if self.name: name = self.name + name return name