def __init__(self, config=None): super().__init__() config = config if config is not None else self._get_default_config() self.config = config self.network_height = len(config['num_filters']) # downsampling blocks self.conv_down = nn.ModuleDict() self.fgru_down = nn.ModuleDict() self.pool = nn.ModuleDict() for i in range(self.network_height - 1): in_c = config['in_channels'] if i == 0 else config['num_filters'][ i - 1] blk = self._conv_block(in_c, config['num_filters'][i], kernel_size=config['conv_kernel_size'][i], blocksize=config['conv_blocksize'][i], normtype=config['conv_normtype'], dropout_p=config['conv_dropout_p'], name='') self.conv_down[str(i)] = blk fgru_cell = fConvGRUCell( config['num_filters'][i], config['fgru_hidden_size'][i], config['fgru_kernel_size'][i], config['fgru_timesteps'], config['fgru_normtype'], config['fgru_channel_sym'], config['fgru_attention_args']) self.fgru_down[str(i)] = fgru_cell self.pool[str(i)] = nn.MaxPool2d(kernel_size=2, stride=2) # bottleneck self.conv_bottleneck = self._conv_block( config['num_filters'][-2], config['num_filters'][-1], kernel_size=config['conv_kernel_size'][-1], blocksize=config['conv_blocksize'][-1], normtype=config['conv_normtype'], dropout_p=config['conv_dropout_p']) self.fgru_bottleneck = fConvGRUCell( config['num_filters'][-1], config['fgru_hidden_size'][-1], config['fgru_kernel_size'][self.network_height - 1], config['fgru_timesteps'], config['fgru_normtype'], config['fgru_channel_sym'], config['fgru_attention_args']) # upsampling blocks self.upsample = nn.ModuleDict() self.ups_conv = nn.ModuleDict() self.conv_up = nn.ModuleDict() self.fgru_up = nn.ModuleDict() for i in range(self.network_height - 2, -1, -1): # 2nd-to-deepest to first level # upsampling operations if config['upsample_mode'] == 'transpose': if config['upsample_all2all']: raise NotImplementedError( 'Transpose mode does not support all-to-all') self.upsample[str(i)] = nn.ConvTranspose2d( config['num_filters'][i + 1], config['num_filters'][i], kernel_size=2, stride=2) else: # ups_out_dims = tuple( # np.array(config['in_dims'][:2]) // (2 ** i)) if config[ 'upsample_all2all']: # will concat fgru act from all layers below ups_in_channels = [config['num_filters'][i + 1]] for j in range(i + 1, self.network_height): ups = nn.Upsample(scale_factor=2**(j - i), mode=config['upsample_mode'], align_corners=False) self.upsample["{}-{}".format(j, i)] = ups ups_in_channels += [config['num_filters'][j]] ups_in_channels = sum(ups_in_channels) else: ups = nn.Upsample(scale_factor=2, mode=config['upsample_mode'], align_corners=False) self.upsample["{}-{}".format(i + 1, i)] = ups ups_in_channels = config['num_filters'][i + 1] self.ups_conv[str(i)] = nn.Conv2d(ups_in_channels, config['num_filters'][i], kernel_size=1) # conv block blk = self._conv_block( config['num_filters'][i] * 2, # concat'd skip activity config['num_filters'][i], kernel_size=config['conv_kernel_size'][i], blocksize=config['conv_blocksize'][i], normtype=config['conv_normtype'], dropout_p=config['conv_dropout_p'], name='') self.conv_up[str(i)] = blk # fgru fgru_cell = fConvGRUCell( config['num_filters'][i], config['fgru_hidden_size'][i], config['fgru_kernel_size'][(self.network_height * 2 - 2) - i], config['fgru_timesteps'], config['fgru_normtype'], config['fgru_channel_sym'], config['fgru_attention_args']) self.fgru_up[str(i)] = fgru_cell
def __init__(self, params, dico, is_encoder, with_output): """ Transformer model (encoder or decoder). """ super().__init__() # encoder / decoder, output layer self.is_encoder = is_encoder self.is_decoder = not is_encoder self.with_output = with_output # dictionary / languages self.n_langs = params.n_langs self.n_words = params.n_words self.eos_index = params.eos_index self.pad_index = params.pad_index self.dico = dico self.id2lang = params.id2lang self.lang2id = params.lang2id self.use_lang_emb = getattr(params, 'use_lang_emb', True) assert len(self.dico) == self.n_words assert len(self.id2lang) == len(self.lang2id) == self.n_langs # model parameters self.dim = params.emb_dim # 512 by default self.hidden_dim = self.dim * 4 # 2048 by default self.n_heads = params.n_heads # 8 by default self.n_layers = params.n_layers self.dropout = params.dropout self.attention_dropout = params.attention_dropout assert self.dim % self.n_heads == 0, 'transformer dim must be a multiple of n_heads' # embeddings self.position_embeddings = Embedding(N_MAX_POSITIONS, self.dim) if params.sinusoidal_embeddings: create_sinusoidal_embeddings(N_MAX_POSITIONS, self.dim, out=self.position_embeddings.weight) if params.n_langs > 1 and self.use_lang_emb: self.lang_embeddings = Embedding(self.n_langs, self.dim) self.embeddings = Embedding(self.n_words, self.dim, padding_idx=self.pad_index) self.layer_norm_emb = nn.LayerNorm(self.dim, eps=1e-12) # transformer layers self.attentions = nn.ModuleList() self.layer_norm1 = nn.ModuleList() self.ffns = nn.ModuleList() self.layer_norm2 = nn.ModuleList() if self.is_decoder: self.layer_norm15 = nn.ModuleList() self.encoder_attn = nn.ModuleList() # memories self.memories = nn.ModuleDict() if getattr(params, 'use_memory', False): mem_positions = params.mem_enc_positions if is_encoder else params.mem_dec_positions for layer_id, pos in mem_positions: assert 0 <= layer_id <= params.n_layers - 1 assert pos in ['in', 'after'] self.memories['%i_%s' % (layer_id, pos)] = HashingMemory.build(self.dim, self.dim, params) for layer_id in range(self.n_layers): self.attentions.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout)) self.layer_norm1.append(nn.LayerNorm(self.dim, eps=1e-12)) if self.is_decoder: self.layer_norm15.append(nn.LayerNorm(self.dim, eps=1e-12)) self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout)) if ('%i_in' % layer_id) in self.memories: self.ffns.append(None) else: self.ffns.append(TransformerFFN(self.dim, self.hidden_dim, self.dim, dropout=self.dropout, gelu_activation=params.gelu_activation)) self.layer_norm2.append(nn.LayerNorm(self.dim, eps=1e-12)) # output layer if self.with_output: self.pred_layer = PredLayer(params) if params.share_inout_emb: self.pred_layer.proj.weight = self.embeddings.weight self.use_positional_embedding = params.use_positional_embedding
def __init__(self, wavenumbers=None, param_file=None, dtype=torch.float64, device='cuda'): """ wavenumbers: torch tensor, emissivity is evaluated at each wavenumber passed n_freqs: int, number of resonant frequencies in the system """ super().__init__() if wavenumbers is None or param_file is None: print( 'must initialize InfraRenderProject with wavenumbers and param file' ) return # setup convolutional layers self.device = device self.dtype = dtype self.wavenumbers = wavenumbers self.mixture_model = InverseRenderMixtureModel(paramFile=param_file, wavenumbers=wavenumbers, dtype=dtype, device=device) self.conv1 = nn.Conv1d(1, 3, 5) self.conv2 = nn.Conv1d(3, 6, 4) self.conv3 = nn.Conv1d(6, 12, 5) self.conv4 = nn.Conv1d(12, 24, 4) self.pool = nn.MaxPool1d(2) # use dummy to compute size of convolutional layer output with torch.no_grad(): dummy = torch.empty((1, wavenumbers.shape[0])) dummy = self.convolutions(dummy) dummy = torch.flatten(dummy, start_dim=1) self.fc1 = nn.Linear(dummy.shape[1], 150) self.fc2 = nn.Linear(150, 150) # setup fully connected layers and renderer self.fc_freqs_dict = nn.ModuleDict() self.fc_gammas_dict = nn.ModuleDict() self.fc_rhos_dict = nn.ModuleDict() self.fc_epsilon_dict = nn.ModuleDict() self.fc_mode_weight_dict = nn.ModuleDict() for key, endmember in self.mixture_model.endmemberModels.items(): self.fc_freqs_dict[key] = nn.ModuleList() self.fc_gammas_dict[key] = nn.ModuleList() self.fc_rhos_dict[key] = nn.ModuleList() self.fc_epsilon_dict[key] = nn.ModuleList() self.fc_mode_weight_dict[key] = nn.ModuleList() for mode_idx, mode in enumerate(endmember.modes): self.fc_freqs_dict[key].append( nn.Linear(150, mode.freqs.shape[0])) self.fc_gammas_dict[key].append( nn.Linear(150, mode.gammas.shape[0])) self.fc_rhos_dict[key].append( nn.Linear(150, mode.rhos.shape[0])) self.fc_epsilon_dict[key].append(nn.Linear(150, 1)) self.fc_mode_weight_dict[key].append(nn.Linear(150, 1)) self.fc_abundances = nn.Linear( 150, self.mixture_model.endmemberModels.__len__()) self.endmemberSpectra = None self.abundances = None self.pred_spectra = None self.mse = torch.nn.MSELoss(reduction='mean')
def __init__(self, in_size, out_size, etypes): super(HeteroRGCNLayer, self).__init__() # W_r for each relation self.weight = nn.ModuleDict({ name: nn.Linear(in_size, out_size) for name in etypes })
def create_task(args, entity_symbols=None, slice_datasets=None): """Returns an EmmentalTask for named entity disambiguation (NED). Args: args: args entity_symbols: entity symbols (default None) slice_datasets: slice datasets used in scorer (default None) Returns: EmmentalTask for NED """ if entity_symbols is None: entity_symbols = EntitySymbols.load_from_cache( load_dir=os.path.join(args.data_config.entity_dir, args.data_config.entity_map_dir), alias_cand_map_file=args.data_config.alias_cand_map, alias_idx_file=args.data_config.alias_idx_map, ) # Create sentence encoder bert_model = BertEncoder(args.data_config.word_embedding, output_size=args.model_config.hidden_size) # Gets the tasks that query for the individual embeddings (e.g., word, entity, type, kg) # The device dict will store which embedding modules we want on the cpu ( embedding_task_flows, # task flows for standard embeddings (e.g., kg, type, entity) embedding_module_pool, # module for standard embeddings embedding_module_device_dict, # module device dict for standard embeddings # some embeddings output indices for BERT so we handle these embeddings in our BERT layer # (see comments in get_through_bert_embedding_tasks) extra_bert_embedding_layers, embedding_payload_inputs, # the layers that are fed into the payload embedding_total_sizes, # total size of all embeddings ) = get_embedding_tasks(args, entity_symbols) # Add the extra embedding layers to BERT module for emb_obj in extra_bert_embedding_layers: bert_model.add_embedding(emb_obj) # Create the embedding payload, attention network, and prediction layer modules if args.model_config.attn_class == "BootlegM2E": embedding_payload = EmbeddingPayload(args, entity_symbols, embedding_total_sizes) attn_network = BootlegM2E(args, entity_symbols) pred_layer = PredictionLayer(args) elif args.model_config.attn_class == "Bootleg": embedding_payload = EmbeddingPayload(args, entity_symbols, embedding_total_sizes) attn_network = Bootleg(args, entity_symbols) pred_layer = PredictionLayer(args) elif args.model_config.attn_class == "BERTNED": # Baseline model embedding_payload = EmbeddingPayloadBase(args, entity_symbols, embedding_total_sizes) attn_network = BERTNED(args, entity_symbols) pred_layer = NoopPredictionLayer(args) else: raise ValueError(f"{args.model_config.attn_class} is not supported.") sliced_scorer = BootlegSlicedScorer(args.data_config.train_in_candidates, slice_datasets) # Create module pool and combine with embedding module pool module_pool = nn.ModuleDict({ BERT_MODEL_NAME: bert_model, "embedding_payload": embedding_payload, "attn_network": attn_network, PRED_LAYER: pred_layer, }) module_pool.update(embedding_module_pool) # Create task flow task_flow = [ { "name": BERT_MODEL_NAME, "module": BERT_MODEL_NAME, "inputs": [ ("_input_", "entity_cand_eid"), ("_input_", "token_ids"), ], # We pass the entity_cand_eids to BERT in case of embeddings that require word information }, *embedding_task_flows, # Add task flows to create embedding inputs { "name": "embedding_payload", "module": "embedding_payload", # outputs: embedding_tensor "inputs": [ ("_input_", "start_span_idx"), ("_input_", "end_span_idx"), *embedding_payload_inputs, # all embeddings ], }, { "name": "attn_network", "module": "attn_network", # output: predictions from layers, output entity embeddings "inputs": [ (BERT_MODEL_NAME, 0), # sentence embedding (BERT_MODEL_NAME, 1), # sentence embedding mask ("embedding_payload", 0), ("_input_", "entity_cand_eid_mask"), ("_input_", "start_span_idx"), ("_input_", "end_span_idx"), ( "_input_", "batch_on_the_fly_kg_adj", ), # special kg adjacency embedding prepped in dataloader ], }, { "name": PRED_LAYER, "module": PRED_LAYER, "inputs": [ ( "attn_network", "intermed_scores", ), # output predictions from intermediate layers from the model ( "attn_network", "ent_embs", ), # output entity embeddings (from all KG modules) ( "attn_network", "final_scores", ), # score (empty except for baseline model) ], }, ] return EmmentalTask( name=NED_TASK, module_pool=module_pool, task_flow=task_flow, loss_func=disambig_loss, output_func=disambig_output, require_prob_for_eval=False, require_pred_for_eval=True, # action_outputs are used to stitch together sentence fragments action_outputs=[ ("_input_", "sent_idx"), ("_input_", "subsent_idx"), ("_input_", "alias_orig_list_pos"), ("_input_", "for_dump_gold_cand_K_idx_train"), (PRED_LAYER, "ent_embs"), # entity embeddings ], scorer=Scorer(customize_metric_funcs={ f"{NED_TASK}_scorer": sliced_scorer.bootleg_score }), module_device=embedding_module_device_dict, )
def _rebuild_module_dict(self): self.nets = nn.ModuleDict(self._nets)
def __init__( self, n_nets=3, # default scale ratio: 2 # # EXAMPLE with three modules and the last with input shape = (64, 64, 64) # First net input (256, 256, 256) -> downsample -> (64, 64, 64) # Second net input (128, 128, 128) -> downsample -> (64, 64, 64) # Third net input (64, 64, 64) # # i.e. scale_ratio=2, n_nets=3: 256 -> 128 -> 64 # scale_ratio=4, n_nets=2: 256 -> 64 scale_ratio=2, module_shape=(64, 64, 64), input_shapes=None, initial_ds=None, crop_and_ds_inputs=False, crop_and_us_outputs=True, in_channels=1, out_channels=None, num_inputs=1, filter_sizes_down=(((4, 8), (8, 16), (16, 32)), ((8, 16), (16, 32), (32, 64)), ((32, 64), (64, 128), (128, 256))), filter_sizes_bottleneck=((32, 64), (64, 128), (256, 512)), filter_sizes_up=(((32, 32), (16, 16), (8, 8)), ((64, 64), (32, 32), (16, 16)), ((256, 256), (128, 128), (64, 64))), batch_norm=True, output_activation='softmax', verbose=False): super(cNnet, self).__init__() # ______________________________ # Parameters and settings # Assertions assert out_channels is not None, 'The number of output classes for each module needs to be specified!' assert in_channels is not None, 'The number of input channels needs to be specified!' assert len(filter_sizes_down) == n_nets assert len(filter_sizes_up) == n_nets assert len(filter_sizes_bottleneck) == n_nets # ______________________________ # Define layers self.avg_pool_inputs = nn.ModuleDict() self.unets = nn.ModuleList() for net_idx in range(n_nets): if crop_and_ds_inputs: if 0 < net_idx < n_nets - 1: # First, the tensor will be cropped # and then downsampled self.avg_pool_inputs[net_idx] = nn.AvgPool3d( kernel_size=(scale_ratio**(n_nets - net_idx - 1), ) * 3) self.unets.append( Unet(num_classes=out_channels[net_idx], in_channels=in_channels + out_channels[net_idx], filter_sizes_down=filter_sizes_down[net_idx], filter_sizes_up=filter_sizes_up[net_idx], filter_sizes_bottleneck=filter_sizes_bottleneck, kernel_size=3, batch_norm=batch_norm, ndims=3, return_last_upsampling=True, output_activation=output_activation))
def __init__(self, layer_choice): super(DartsLayerChoice, self).__init__() self.name = layer_choice.label self.op_choices = nn.ModuleDict(OrderedDict([(name, layer_choice[name]) for name in layer_choice.names])) self.alpha = nn.Parameter(torch.randn(len(self.op_choices)) * 1e-3)
def __init__(self, input_list, output_info, fusion_lists, nonlinearity=nn.ReLU()): super(VIN, self).__init__() self.input_list = input_list # This output_info is only for the level0 outputs, i.e., outputs from each input type separately self.output_info = output_info self.fusion_lists = fusion_lists self.num_inputs = len(input_list) self.num_levels = len(fusion_lists) if isinstance(output_info, dict): self.num_targets = 1 elif isinstance(output_info, list): self.num_targets = len(output_info) assert self.num_targets > 1 else: raise ValueError( f'output_info must be either a dict (one target) or a list (multiple targets), ' f'but is {type(output_info)}') self.weights = nn.ParameterDict() self.layers = nn.ModuleDict() # Embed discrete variables with nn.Embedding self.input_embeddings = nn.ModuleDict() # self.repr_dims stores the dimensions of the learned representations from each level; # it will be used when combining all the learned representations self.repr_dims = [[]] # self.repr_locations is only used for fusing repr_list with fusion_type='repr-loss-avg' # because there can be more outputs than latent representations in a level (each output is associated a loss), # len(self.weights['level{i}[_target{t}]_loss_weight']) can be bigger than len(repr_list) # use self.repr_locations to specify the correspondance between loss weight and view weight (through subscript) self.repr_locations = [[]] # provide default parameters for in_dict default_dict = { 'padding_idx': 0, 'max_norm': 1, 'norm_type': 2, 'scale_grad_by_freq': True, 'last_nonlinearity': False, 'bias': False, 'dense': True, 'residual': False, 'residual_layers': 'all' } for i, in_dict in enumerate(input_list): # This is used to produce the learned vector representations from all the input data types individually in_dim = in_dict['in_dim'] in_type = in_dict['in_type'] hidden_dim = in_dict['hidden_dim'] # hidden_dim is a list self.repr_dims[0].append(hidden_dim[-1]) self.repr_locations[0].append(i) # in case in_dict does not contain all required keys, append them from default values append_dict(in_dict, default_dict) if in_type == 'discrete': # If padding_idx=0 (for missing values), the index for a discrete variable should start from 1 self.input_embeddings[str(i)] = torch.nn.Embedding( num_embeddings=in_dim if in_dict['padding_idx'] is None else in_dim + 1, embedding_dim=in_dict['embedding_dim'], padding_idx=in_dict['padding_idx'], max_norm=in_dict['max_norm'], norm_type=in_dict['norm_type'], scale_grad_by_freq=in_dict['scale_grad_by_freq'], sparse=False, _weight=None) in_dim = in_dict['embedding_dim'] else: assert in_type == 'continuous', ( f'Currently only handle discrete or continous input type, ' f'but {i}th in_type is {in_type}!') self.layers[f'input{i}_hidden_layers'] = DenseLinear( in_dim=in_dim, hidden_dim=hidden_dim, nonlinearity=nonlinearity, last_nonlinearity=in_dict['last_nonlinearity'], bias=in_dict['bias'], dense=in_dict['dense'], residual=in_dict['residual'], residual_layers=in_dict['residual_layers'], forward_input=False, return_all=False, return_layers=None, return_list=False) # provide default parameters for output_info and fusion_lists default_dict = { 'last_nonlinearity': False, 'bias': False, 'dense': True, 'residual': False, 'residual_layers': 'all-but-last' } if self.num_targets == 1: # Generate level0 outputs from each input using their high-level representations with DenseLinear model; # For code simplicity, make output_layers from all views have the same hidden_dim # output_info is a dictionary hidden_dim = output_info['hidden_dim'] self.out_dim = hidden_dim[-1] append_dict( output_info, default_dict ) # provide default values in case they are missing in output_info for i, in_dim in enumerate(self.repr_dims[0]): # For coding simplicity, the output layers from all views will have the same hidden_dim self.layers[f'input{i}_output_layers'] = DenseLinear( in_dim=in_dim, hidden_dim=hidden_dim, nonlinearity=nonlinearity, last_nonlinearity=output_info['last_nonlinearity'], bias=output_info['bias'], dense=output_info['dense'], residual=output_info['residual'], residual_layers=output_info['residual_layers'], forward_input=False, return_all=False, return_layers=None, return_list=False) else: # self.num_targets > 1 # For each target, generate level0 outputs from each input # using their high-level representations with DenseLinear model; self.out_dims = [] # output_info is a list of dictionaries # self.num_targets == len(output_info) for j, out_dict in enumerate(output_info): hidden_dim = out_dict['hidden_dim'] # it is a list self.out_dims.append(hidden_dim[-1]) append_dict( out_dict, default_dict ) # provide default values in case they are missing in out_dict for i, in_dim in enumerate(self.repr_dims[0]): # For each target, compute an output from each input type self.layers[ f'input{i}_target{j}_output_layers'] = DenseLinear( in_dim=in_dim, hidden_dim=hidden_dim, nonlinearity=nonlinearity, last_nonlinearity=out_dict['last_nonlinearity'], bias=out_dict['bias'], dense=out_dict['dense'], residual=out_dict['residual'], residual_layers=out_dict['residual_layers'], forward_input=False, return_all=False, return_layers=None, return_list=False) for level, fusion_list in enumerate(fusion_lists): num_outputs = self.num_inputs if level == 0 else len( fusion_lists[level - 1]) # loss weight at each level if self.num_targets == 1: self.weights[f'fusion{level}_loss_weight'] = nn.Parameter( torch.empty(num_outputs), requires_grad=True) nn.init.constant_(self.weights[f'fusion{level}_loss_weight'], 1.) else: for t in range(self.num_targets): self.weights[ f'fusion{level}_target{t}_loss_weight'] = nn.Parameter( torch.empty(num_outputs), requires_grad=True) nn.init.constant_( self.weights[f'fusion{level}_target{t}_loss_weight'], 1.) new_repr_dim = [] new_repr_location = [] for i, fusion_dict in enumerate(fusion_list): fusion_type = fusion_dict['fusion_type'] append_dict( fusion_dict, default_dict ) # provide default values in case they are missing in fusion_dict if fusion_type.startswith('repr'): # learn a new hidden representations from fused representations # prepare in_dim if re.search('avg', fusion_type): for d in self.repr_dims[-1]: assert d == self.repr_dims[-1][0] in_dim = self.repr_dims[-1][0] elif re.search('cat', fusion_type): in_dim = sum(self.repr_dims[-1]) elif re.search('repr[0-9]', fusion_type): in_dim = self.repr_dims[-1][int( fusion_type[4:] )] # here, in most cases, fusion_type='repr0' else: raise ValueError( f'fusion_type={fusion_type} starting with repr must be repr0 ' f'or contain either avg or cat') hidden_dim = fusion_dict['hidden_dim'] # a list of ints if re.search('_repr', fusion_type): new_repr_dim.append(hidden_dim[-1]) new_repr_location.append(i) self.layers[ f'fusion{level}-{i}_hidden_layers'] = DenseLinear( in_dim, hidden_dim, nonlinearity=nonlinearity, last_nonlinearity=fusion_dict['last_nonlinearity'], bias=fusion_dict['bias'], dense=fusion_dict['dense'], residual=fusion_dict['residual'], residual_layers=fusion_dict['residual_layers'], forward_input=False, return_all=False, return_layers=None, return_list=False) # output_info is a dictionary if self.num_targets==1 else a list of dictionaries output_info = fusion_dict['output_info'] if self.num_targets == 1: append_dict( output_info, default_dict ) # provide default values in case they are missing in output_info # initialize view weights if fusion_type.startswith('repr-weighted-avg'): self.weights[ f'fusion{level}_view_weight'] = nn.Parameter( torch.empty(len(self.repr_dims[-1])), requires_grad=True) nn.init.constant_( self.weights[f'fusion{level}_view_weight'], 1.) self.layers[ f'fusion{level}-{i}_output_layers'] = DenseLinear( in_dim=hidden_dim[-1], hidden_dim=output_info['hidden_dim'], nonlinearity=nonlinearity, last_nonlinearity=output_info[ 'last_nonlinearity'], bias=output_info['bias'], dense=output_info['dense'], residual=output_info['residual'], residual_layers=output_info['residual_layers'], forward_input=False, return_all=False, return_layers=None, return_list=False) else: if fusion_type.startswith('repr-weighted-avg'): for t in range(self.num_targets): self.weights[ f'fusion{level}_target{t}_view_weight'] = nn.Parameter( torch.empty(len(self.repr_dims[-1])), requires_grad=True) nn.init.constant_( self.weights[ f'fusion{level}_target{t}_view_weight'], 1.) for t, out_dict in enumerate(output_info): append_dict( out_dict, default_dict ) # provide default values in case they are missing in out_dict self.layers[ f'fusion{level}-{i}_target{t}_output_layers'] = DenseLinear( in_dim=hidden_dim[-1], hidden_dim=out_dict['hidden_dim'], nonlinearity=nonlinearity, last_nonlinearity=out_dict[ 'last_nonlinearity'], bias=out_dict['bias'], dense=out_dict['dense'], residual=out_dict['residual'], residual_layers=out_dict[ 'residual_layers'], forward_input=False, return_all=False, return_layers=None, return_list=False) elif fusion_type.startswith('out'): if self.num_targets == 1: if fusion_type == 'out-weighted-avg': self.weights[ f'fusion{level}_out_weight'] = nn.Parameter( torch.empty(num_outputs), requires_grad=True) nn.init.constant_( self.weights[f'fusion{level}_out_weight'], 1.) else: if fusion_type.startswith('out-weighted-avg'): for t in range(self.num_targets): self.weights[ f'fusion{level}_target{t}_out_weight'] = nn.Parameter( torch.empty(num_outputs), requires_grad=True) nn.init.constant_( self.weights[ f'fusion{level}_target{t}_out_weight'], 1.) else: raise ValueError( f'fusion_type must start with repr or out, but is {fusion_type}' ) self.repr_dims.append(new_repr_dim) self.repr_locations.append(new_repr_location) # the loss weight for the last level; not used in almost all the cases; mainly used to avoid error in get_vin_loss if self.num_targets == 1: self.weights[ f'fusion{self.num_levels}_loss_weight'] = nn.Parameter( torch.empty(len(fusion_lists[-1])), requires_grad=True) nn.init.constant_( self.weights[f'fusion{self.num_levels}_loss_weight'], 1.) else: for t in range(self.num_targets): self.weights[ f'fusion{self.num_levels}_target{t}_loss_weight'] = nn.Parameter( torch.empty(len(fusion_lists[-1])), requires_grad=True) nn.init.constant_( self. weights[f'fusion{self.num_levels}_target{t}_loss_weight'], 1.)
def __init__(self, dataset, n_layers, in_channels=3, channels=16, n_nodes=4, retrain=False, shared_modules=None): super().__init__() assert dataset in ["cifar10", "imagenet"] self.dataset = dataset self.input_size = 32 if dataset == "cifar" else 224 self.in_channels = in_channels self.channels = channels self.n_nodes = n_nodes self.aux_size = {2 * n_layers // 3: self.input_size // 4} if dataset == "cifar10": self.n_classes = 10 self.aux_head_class = AuxiliaryHeadCIFAR if retrain else DistillHeadCIFAR if not retrain: self.aux_size = {n_layers // 3: 6, 2 * n_layers // 3: 6} elif dataset == "imagenet": self.n_classes = 1000 self.aux_head_class = AuxiliaryHeadImageNet if retrain else DistillHeadImagenet if not retrain: self.aux_size = {n_layers // 3: 6, 2 * n_layers // 3: 5} self.n_layers = n_layers self.aux_head = nn.ModuleDict() self.ensemble_param = nn.Parameter(torch.rand(len(self.aux_size) + 1) / (len(self.aux_size) + 1)) \ if not retrain else None stem_multiplier = 3 if dataset == "cifar" else 1 c_cur = stem_multiplier * self.channels self.shared_modules = {} # do not wrap with ModuleDict if shared_modules is not None: self.stem = shared_modules["stem"] else: self.stem = nn.Sequential( nn.Conv2d(in_channels, c_cur, 3, 1, 1, bias=False), nn.BatchNorm2d(c_cur) ) self.shared_modules["stem"] = self.stem # for the first cell, stem is used for both s0 and s1 # [!] channels_pp and channels_p is output channel size, but c_cur is input channel size. channels_pp, channels_p, c_cur = c_cur, c_cur, channels self.cells = nn.ModuleList() reduction_p, reduction = False, False aux_head_count = 0 for i in range(n_layers): reduction_p, reduction = reduction, False if i in [n_layers // 3, 2 * n_layers // 3]: c_cur *= 2 reduction = True cell = Cell(n_nodes, channels_pp, channels_p, c_cur, reduction_p, reduction) self.cells.append(cell) c_cur_out = c_cur * n_nodes if i in self.aux_size: if shared_modules is not None: self.aux_head[str(i)] = shared_modules["aux" + str(aux_head_count)] else: self.aux_head[str(i)] = self.aux_head_class(c_cur_out, self.aux_size[i], self.n_classes) self.shared_modules["aux" + str(aux_head_count)] = self.aux_head[str(i)] aux_head_count += 1 channels_pp, channels_p = channels_p, c_cur_out self.gap = nn.AdaptiveAvgPool2d(1) self.linear = nn.Linear(channels_p, self.n_classes)
# 4.1.2.2 ModuleList类 net = nn.ModuleList([nn.Linear(784, 256), nn.ReLU()]) net.append(nn.Linear(256, 10)) # # 类似List的append操作 print(net[-1]) # 类似List的索引访问 print(net) # net(torch.zeros(1, 784)) # 会报NotImplementedError # 报错的原因是: # ModuleList仅仅是一个储存各种模块的列表,这些模块之间没有联系也没有顺序 # 所以不用保证相邻层的输入输出维度匹配,所以没有实现forward(前向传播)功能,导致报错 # ModuleDict类 # ModuleDict接收一个子模块的字典作为输入, 然后也可以类似字典那样进行添加访问操作: net = nn.ModuleDict({ 'linear': nn.Linear(784, 256), 'act': nn.ReLU(), }) net['output'] = nn.Linear(256, 10) # 添加 print(net['linear']) # 访问 print(net.output) print(net) # net(torch.zeros(1, 784)) # 会报NotImplementedError #4.1.3 构造复杂的模型 class FancyMLP(nn.Module): def __init__(self, **kwargs): super(FancyMLP, self).__init__(**kwargs) self.rand_weight = torch.rand((20, 20),
def __init__(self, normalize=False): super().__init__() self.normalize = normalize self.losses = nn.ModuleDict() self.weights = {} self.values = {}
def __init__(self, global_args, network_args, loss_func): super(NetworkABC, self).__init__() self.global_args = global_args self.network_args = network_args self.loss_func = loss_func self.net = nn.ModuleDict()
def __init__(self, args, generator, discriminator, gen_optim, disc_optim, train_loader, val_loader, loss_funcs, gen_scheduler=None, disc_scheduler=None): self.logger = get_logger(name=__name__, save_file=args.log_path / args.run_name) # Checking whether inputs are correct. assert isinstance(generator, nn.Module) and isinstance(discriminator, nn.Module), \ '`generator` and `discriminator` must be Pytorch Modules.' assert isinstance(gen_optim, optim.Optimizer) and isinstance(disc_optim, optim.Optimizer), \ '`gen_optim` and `disc_optim` must be Pytorch Optimizers.' assert isinstance(train_loader, DataLoader) and isinstance(val_loader, DataLoader), \ '`train_loader` and `val_loader` must be Pytorch DataLoader objects.' loss_funcs = nn.ModuleDict( loss_funcs ) # Expected to be a dictionary with names and loss functions. if gen_scheduler is not None: if isinstance(gen_scheduler, optim.lr_scheduler.ReduceLROnPlateau): self.metric_gen_scheduler = True elif isinstance(gen_scheduler, optim.lr_scheduler._LRScheduler): self.metric_gen_scheduler = False else: raise TypeError( '`gen_scheduler` must be a Pytorch Learning Rate Scheduler.' ) if disc_scheduler is not None: if isinstance(disc_scheduler, optim.lr_scheduler.ReduceLROnPlateau): self.metric_disc_scheduler = True elif isinstance(disc_scheduler, optim.lr_scheduler._LRScheduler): self.metric_disc_scheduler = False else: raise TypeError( '`disc_scheduler` must be a Pytorch Learning Rate Scheduler.' ) self.generator = generator self.discriminator = discriminator self.gen_optim = gen_optim self.disc_optim = disc_optim self.train_loader = train_loader self.val_loader = val_loader self.loss_funcs = loss_funcs self.gen_scheduler = gen_scheduler self.disc_scheduler = disc_scheduler self.device = args.device self.verbose = args.verbose self.num_epochs = args.num_epochs self.writer = SummaryWriter(str(args.log_path)) self.recon_lambda = torch.tensor(args.recon_lambda, dtype=torch.float32, device=args.device) # This will work best if batch size is 1, as is recommended. I don't know whether this generalizes. self.target_real = torch.tensor(1, dtype=torch.float32, device=args.device) self.target_fake = torch.tensor(0, dtype=torch.float32, device=args.device) # Display interval of 0 means no display of validation images on TensorBoard. if args.max_images <= 0: self.display_interval = 0 else: self.display_interval = int( len(self.val_loader.dataset) // (args.max_images * args.batch_size)) self.generator_checkpoint_manager = CheckpointManager( model=self.generator, optimizer=self.gen_optim, mode='min', save_best_only=args.save_best_only, ckpt_dir=args.ckpt_path / 'Generator', max_to_keep=args.max_to_keep) self.discriminator_checkpoint_manager = CheckpointManager( model=self.discriminator, optimizer=self.disc_optim, mode='min', save_best_only=args.save_best_only, ckpt_dir=args.ckpt_path / 'Discriminator', max_to_keep=args.max_to_keep) # loading from checkpoint if specified. if vars(args).get('gen_prev_model_ckpt'): self.generator_checkpoint_manager.load( load_dir=args.gen_prev_model_ckpt, load_optimizer=False) if vars(args).get('disc_prev_model_ckpt'): self.discriminator_checkpoint_manager.load( load_dir=args.disc_prev_model_ckpt, load_optimizer=False)
def __init__(self, util_e, high_order_utils=[], prior_flag=False, sizes=[], size_flag=False, size_force=False, pairwise_flag=True, unary_flag=True, self_flag=True): super(Atten, self).__init__() self.util_e = util_e self.prior_flag = prior_flag self.n_utils = len(util_e) self.spatial_pool = nn.ModuleDict() self.un_models = nn.ModuleList() self.self_flag = self_flag self.pairwise_flag = pairwise_flag self.unary_flag = unary_flag self.size_flag = size_flag self.size_force = size_force if not self.size_flag: sizes = [None for _ in util_e] self.high_order_utils = high_order_utils self.high_order_set = set([h[0] for h in self.high_order_utils]) for idx, e_dim in enumerate(util_e): self.un_models.append(Unary(e_dim)) if self.size_force: self.spatial_pool[str(idx)] = nn.AdaptiveAvgPool1d(sizes[idx]) self.pp_models = nn.ModuleDict() for ((idx1, e_dim_1), (idx2, e_dim_2)) \ in combinations_with_replacement(enumerate(util_e), 2): if idx1 == idx2: self.pp_models[str(idx1)] = Pairwise(e_dim_1, sizes[idx1]) else: if pairwise_flag: for i, num_utils, connected_list in self.high_order_utils: if i == idx1 and idx2 not in set(connected_list) \ or idx2 == i and idx1 not in set(connected_list): continue self.pp_models[str( (idx1, idx2))] = Pairwise(e_dim_1, sizes[idx1], e_dim_2, sizes[idx2]) self.reduce_potentials = nn.ModuleList() self.num_of_potentials = dict() self.default_num_of_potentials = 0 if self.self_flag: self.default_num_of_potentials += 1 if self.unary_flag: self.default_num_of_potentials += 1 if self.prior_flag: self.default_num_of_potentials += 1 for idx in range(self.n_utils): self.num_of_potentials[idx] = self.default_num_of_potentials ''' ' All other utils ''' if pairwise_flag: for idx, num_utils, connected_utils in high_order_utils: for c_u in connected_utils: self.num_of_potentials[c_u] += num_utils self.num_of_potentials[idx] += 1 for k in self.num_of_potentials.keys(): if k not in self.high_order_set: self.num_of_potentials[k] += (self.n_utils - 1) - len(high_order_utils) for idx in range(self.n_utils): self.reduce_potentials.append( nn.Conv1d(self.num_of_potentials[idx], 1, 1, bias=False))
def __init__(self, models: OrderedDict): super(CombinedModel, self).__init__() self.semantic_groups = OrderedDict() self.model_list = nn.ModuleDict(models) self.semantic_groups = {g:models[g].output_semantics for g in models}
def __init__(self, layer, filter_multiplier, block_multiplier, steps, scale, search_space, ppc=None, pc=None, affine=True): super(GumbelCell, self).__init__() # todo add new attribute, affine parameter for bn, making searching phase more stable self.affine = affine # todo add new attribute, for debugging self.layer = layer # change index2scale to index2channel # index -2 and -1 is set by default # index 0, 1, 2, 3, 4 are calculated by int(filter_multiplier * block_multiplier * scale /4) self.index2scale = { 0: 4, 1: 8, 2: 16, 3: 32, } self.index2channel = { 0: int(filter_multiplier * block_multiplier * self.index2scale[0] / 4), 1: int(filter_multiplier * block_multiplier * self.index2scale[1] / 4), 2: int(filter_multiplier * block_multiplier * self.index2scale[2] / 4), 3: int(filter_multiplier * block_multiplier * self.index2scale[3] / 4), } self.steps = steps # nodes within each cell # todo add new attribute self.total_nodes = 2 + self.steps # exclude output node self.filter_multiplier = filter_multiplier self.block_multiplier = block_multiplier self.scale = scale self.search_space = search_space if self.search_space == 'autodeeplab': self.conv_candidates = autodeeplab elif self.search_space == 'proxyless': self.conv_candidates = proxyless elif self.search_space == 'counter': # used to debug self.conv_candidates = counter elif self.search_space == 'my_search_space': self.conv_candidates = my_search_space else: raise ValueError('search space {:} is not support'.format(self.search_space)) #self.conv_candidates = conv_candidates #self.prev_prev_scale = prev_prev_scale #self.prev_scale = prev_scale self.outc = self.index2channel[self.scale] # TODO: do not need prev_prev_scale and prev_scale any more # 1. down same up link for prev_feature # 2. down same up, double down, and double up link for prev_prev_feature # 3. all the link operations are defined in __init__ # 4. justification in forward() pass, and call the related link operation # 5. set prev_feature_channels and prev_prev_feature_channels specifically for output of stem0 and stem1 # set types of link operation according to self.scale if self.scale == 0: # only has same and up link for prev_feature # only has same, up, and double up link for prev_prev_feature self.same_link_prev = ConvLayer(self.outc if pc is None else pc, self.outc, 1, 1, False, affine=affine) self.up_link_prev = FactorizedIncrease(int(self.outc*2) if pc is None else pc, self.outc, affine=affine) self.same_link_prev_prev = ConvLayer(self.outc if ppc is None else ppc, self.outc, 1, 1, False, affine=affine) self.up_link_prev_prev = FactorizedIncrease(int(self.outc*2) if ppc is None else ppc, self.outc, affine=affine) self.double_up_link_prev_prev = DoubleFactorizedIncrease(int(self.outc*4) if ppc is None else ppc, self.outc, affine=affine) # has down for prev_prev_feature in layer-0 self.down_link_prev_prev = FactorizedReduce(int(self.outc/2) if ppc is None else ppc, self.outc, affine=affine) elif self.scale == 1: # has down, same, up link for prev_feature # has down, same, up, and double up link for prev_prev_feature self.down_link_prev = FactorizedReduce(int(self.outc/2) if pc is None else pc, self.outc, affine=affine) self.same_link_prev = ConvLayer(self.outc if pc is None else pc, self.outc, 1, 1, False, affine=affine) self.up_link_prev = FactorizedIncrease(int(self.outc*2) if pc is None else pc, self.outc, affine=affine) self.down_link_prev_prev = FactorizedReduce(int(self.outc/2) if ppc is None else ppc, self.outc, affine=affine) self.same_link_prev_prev = ConvLayer(self.outc if ppc is None else ppc, self.outc, 1, 1, False, affine=affine) self.up_link_prev_prev = FactorizedIncrease(int(self.outc*2) if ppc is None else ppc, self.outc, affine=affine) self.double_up_link_prev_prev = DoubleFactorizedIncrease(int(self.outc*4) if ppc is None else ppc, self.outc, affine=affine) # has double down link for prev_prev_feature self.double_down_link_prev_prev = DoubleFactorizedReduce(int(self.outc/4) if ppc is None else ppc, self.outc, affine=affine) elif self.scale == 2: # has down, same, up link for prev_feature # has ddown, same, up link for prev_prev_feature self.down_link_prev = FactorizedReduce(int(self.outc/2) if pc is None else pc, self.outc, affine=affine) self.same_link_prev = ConvLayer(self.outc if pc is None else pc, self.outc, 1, 1, False, affine=affine) self.up_link_prev = FactorizedIncrease(int(self.outc*2) if pc is None else pc, self.outc, affine=affine) self.down_link_prev_prev = FactorizedReduce(int(self.outc/2) if ppc is None else ppc, self.outc, affine=affine) self.double_down_link_prev_prev = DoubleFactorizedReduce(int(self.outc/4) if ppc is None else ppc, self.outc, affine=affine) self.same_link_prev_prev = ConvLayer(self.outc if ppc is None else ppc, self.outc, 1, 1, False, affine=affine) self.up_link_prev_prev = FactorizedIncrease(int(self.outc*2) if ppc is None else ppc, self.outc, affine=affine) elif self.scale == 3: # has down, same link for prev_feature # has ddown, down, and same for prev_prev_feature self.down_link_prev = FactorizedReduce(int(self.outc/2) if pc is None else pc, self.outc, affine=affine) self.same_link_prev = ConvLayer(self.outc if pc is None else pc, self.outc, 1, 1, False, affine=affine) self.double_down_link_prev_prev = DoubleFactorizedReduce(int(self.outc/4) if ppc is None else ppc, self.outc, affine=affine) self.down_link_prev_prev = FactorizedReduce(int(self.outc/2) if ppc is None else ppc, self.outc, affine=affine) self.same_link_prev_prev = ConvLayer(self.outc if ppc is None else ppc, self.outc, 1, 1, False, affine=affine) else: raise ValueError('invalid scale value {:}'.format(self.scale)) # todo, new attribute nn.ModuleDict() self.ops = nn.ModuleDict() # i::node_index, j::previous_node_index if self.search_space == 'proxyless': for i in range(2, self.total_nodes): for j in range(i): edge_str = '{:}<-{:}'.format(i, j) #if j == 0 and self.prev_prev_scale is None: # for prev_prev_cell # mobile_inverted_conv = None # shortcut = None #else: mobile_inverted_conv = MixedOp( build_candidate_ops(self.conv_candidates, in_channels=self.outc, out_channels=self.outc, stride=1, ops_order='act_weight_bn', affine=self.affine)) # normal MixedOp, ModuleList with weight shortcut = Identity(self.outc, self.outc) #if mobile_inverted_conv is None and shortcut is None: # inverted_residual_block = None #else: inverted_residual_block = MobileInvertedResidualBlock(mobile_inverted_conv, shortcut) self.ops[edge_str] = inverted_residual_block elif self.search_space == 'autodeeplab' or self.search_space == 'my_search_space': # TODO: have issue in search space of autodeeplab for i in range(2, self.total_nodes): for j in range(i): edge_str = '{:}<-{:}'.format(i, j) #if j == 0 and self.prev_prev_scale is None: # op = None #else: op = MixedOp(build_candidate_ops(self.conv_candidates, in_channels=self.outc, out_channels=self.outc, stride=1, ops_order='act_weight_bn', affine=self.affine)) self.ops[edge_str] = op else: raise ValueError('search space {:} is not supported'.format(self.search_space)) self.finalconv1x1 = ConvLayer(self.steps * self.outc, self.outc, 1, 1, False) self.edge_keys = sorted(list(self.ops.keys())) # 'sorted by {:}<-{:}' self.edge2index = {key:i for i, key in enumerate(self.edge_keys)} # {:}<-{:} : index self.nb_edges = len(self.ops) #self.cell_arch_parameters = nn.Parameter(torch.Tensor(self.nb_edges, self.n_choice)) self.cell_arch_parameters = nn.Parameter(1e-3 * torch.randn(self.nb_edges, self.n_choice))
num_epochs = 1200 batch_size = 100 # load all data data = load_data() # normalization(data) # define train dataset and a data loader train_data, test_data = split_data(data) # Normalize data using z-score method normalization(train_data) normalization(test_data) data_tensor = torch.Tensor(train_data.values) input_hidden_layers = nn.ModuleDict() hidden_hidden_layers = nn.ModuleDict() hidden_output_layers = nn.ModuleDict() net = Net(input_size, num_classes, input_hidden_layers, hidden_hidden_layers, hidden_output_layers) # Loss and Optimizer criterion = nn.CrossEntropyLoss( ) # nn.CrossEntropyLoss() computes softmax internally addNeuron(net) # train the model by batch previous_loss = float('inf') # The time period is 15+P*N (refer to: A Cascade network algorithm employing Progressive RPROP) where N is # the number of currently installed neurons, and P is a parameter set prior to training.)
def create_task( task_names: Union[str, List[str]], n_arities: Union[int, List[int]], n_features: int, n_classes: Union[int, List[int]], emb_layer: Optional[EmbeddingModule], model: str = "LSTM", mode: str = "MTL", ) -> List[EmmentalTask]: """Create task from relation(s). :param task_names: Relation name(s), If str, only one relation; If List[str], multiple relations. :type task_names: str, List[str] :param n_arities: The arity of each relation. :type n_arities: int, List[int] :param n_features: The multimodal feature set size. :type n_features: int :param n_classes: Number of classes for each task. (Only support classification task now). :type n_classes: int, List[int] :param emb_layer: The embedding layer for LSTM. No need for LogisticRegression model. :type emb_layer: EmbeddingModule :param model: Model name (available models: "LSTM", "LogisticRegression"), defaults to "LSTM". :type model: str :param mode: Learning mode (available modes: "STL", "MTL"), defaults to "MTL". :type mode: str """ if model not in ["LSTM", "LogisticRegression"]: raise ValueError( f"Unrecognized model {model}. Only support {['LSTM', 'LogisticRegression']}" ) if mode not in ["STL", "MTL"]: raise ValueError( f"Unrecognized mode {mode}. Only support {['STL', 'MTL']}") config = get_config()["learning"][model] logger.info(f"{model} model config: {config}") if not isinstance(task_names, list): task_names = [task_names] if not isinstance(n_arities, list): n_arities = [n_arities] if not isinstance(n_classes, list): n_classes = [n_classes] tasks = [] for task_name, n_arity, n_class in zip(task_names, n_arities, n_classes): if mode == "MTL": feature_module_name = "shared_feature" else: feature_module_name = f"{task_name}_feature" if model == "LSTM": module_pool = nn.ModuleDict({ "emb": emb_layer, feature_module_name: SparseLinear(n_features + 1, config["hidden_dim"], bias=config["bias"]), }) for i in range(n_arity): module_pool.update({ f"{task_name}_lstm{i}": RNN( num_classes=0, emb_size=emb_layer.dim, lstm_hidden=config["hidden_dim"], attention=config["attention"], dropout=config["dropout"], bidirectional=config["bidirectional"], ) }) module_pool.update({ f"{task_name}_pred_head": ConcatLinear( [f"{task_name}_lstm{i}" for i in range(n_arity)] + [feature_module_name], config["hidden_dim"] * (2 * n_arity + 1) if config["bidirectional"] else config["hidden_dim"] * (n_arity + 1), n_class, ) }) task_flow = [] task_flow += [{ "name": f"{task_name}_emb{i}", "module": "emb", "inputs": [("_input_", f"m{i}")], } for i in range(n_arity)] task_flow += [{ "name": f"{task_name}_lstm{i}", "module": f"{task_name}_lstm{i}", "inputs": [(f"{task_name}_emb{i}", 0), ("_input_", f"m{i}_mask")], } for i in range(n_arity)] task_flow += [{ "name": feature_module_name, "module": feature_module_name, "inputs": [ ("_input_", "feature_index"), ("_input_", "feature_weight"), ], }] task_flow += [{ "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": None, }] elif model == "LogisticRegression": module_pool = nn.ModuleDict({ feature_module_name: SparseLinear(n_features + 1, config["hidden_dim"], bias=config["bias"]), f"{task_name}_pred_head": ConcatLinear([feature_module_name], config["hidden_dim"], n_class), }) task_flow = [ { "name": feature_module_name, "module": feature_module_name, "inputs": [ ("_input_", "feature_index"), ("_input_", "feature_weight"), ], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": None, }, ] else: raise ValueError(f"Unrecognized model {model}.") tasks.append( EmmentalTask( name=task_name, module_pool=module_pool, task_flow=task_flow, loss_func=partial(loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=Scorer( metrics=["accuracy", "precision", "recall", "f1"]), )) return tasks
def __init__( self, config_trend=None, config_season=None, config_covar=None, config_regressors=None, config_events=None, config_holidays=None, n_forecasts=1, n_lags=0, num_hidden_layers=0, d_hidden=None, ): """ Args: config_trend (configure.Trend): config_season (configure.Season): config_covar (OrderedDict): config_regressors (OrderedDict): Configs of regressors with mode and index. config_events (OrderedDict): config_holidays (OrderedDict): n_forecasts (int): number of steps to forecast. Aka number of model outputs. n_lags (int): number of previous steps of time series used as input. Aka AR-order. 0 (default): no auto-regression num_hidden_layers (int): number of hidden layers (for AR-Net) 0 (default): no hidden layers, corresponds to classic Auto-Regression d_hidden (int): dimensionality of hidden layers (for AR-Net). ignored if no hidden layers. None (default): sets to n_lags + n_forecasts """ super(TimeNet, self).__init__() # General self.n_forecasts = n_forecasts # Bias self.bias = new_param(dims=[1]) # Metrics live self.metrics_live = {} # Trend self.config_trend = config_trend if self.config_trend.growth in ["linear", "discontinuous"]: self.segmentwise_trend = self.config_trend.trend_reg == 0 self.trend_k0 = new_param(dims=[1]) if self.config_trend.n_changepoints > 0: if self.config_trend.changepoints is None: # create equidistant changepoint times, including zero. linear_t = np.arange(self.config_trend.n_changepoints + 1).astype(float) linear_t = linear_t / (self.config_trend.n_changepoints + 1) self.config_trend.changepoints = self.config_trend.changepoints_range * linear_t else: self.config_trend.changepoints = np.insert( self.config_trend.changepoints, 0, 0.0) self.trend_changepoints_t = torch.tensor( self.config_trend.changepoints, requires_grad=False, dtype=torch.float) self.trend_deltas = new_param(dims=[ self.config_trend.n_changepoints + 1 ]) # including first segment if self.config_trend.growth == "discontinuous": self.trend_m = new_param(dims=[ self.config_trend.n_changepoints + 1 ]) # including first segment # Seasonalities self.config_season = config_season self.season_dims = season_config_to_model_dims(self.config_season) if self.season_dims is not None: if self.config_season.mode == "multiplicative" and self.config_trend is None: log.error("Multiplicative seasonality requires trend.") raise ValueError if self.config_season.mode not in ["additive", "multiplicative"]: log.error( "Seasonality Mode {} not implemented. Defaulting to 'additive'." .format(self.config_season.mode)) self.config_season.mode = "additive" self.season_params = nn.ParameterDict({ name: new_param(dims=[dim]) for name, dim in self.season_dims.items() }) # self.season_params_vec = torch.cat([self.season_params[name] for name in self.season_params.keys()]) # Events self.config_events = config_events self.config_holidays = config_holidays self.events_dims = events_config_to_model_dims(self.config_events, self.config_holidays) if self.events_dims is not None: n_additive_event_params = 0 n_multiplicative_event_params = 0 for event, configs in self.events_dims.items(): if configs["mode"] not in ["additive", "multiplicative"]: log.error( "Event Mode {} not implemented. Defaulting to 'additive'." .format(configs["mode"])) self.events_dims[event]["mode"] = "additive" if configs["mode"] == "additive": n_additive_event_params += len(configs["event_indices"]) elif configs["mode"] == "multiplicative": if self.config_trend is None: log.error("Multiplicative events require trend.") raise ValueError n_multiplicative_event_params += len( configs["event_indices"]) self.event_params = nn.ParameterDict({ "additive": new_param(dims=[n_additive_event_params]), "multiplicative": new_param(dims=[n_multiplicative_event_params]), }) else: self.config_events = None self.config_holidays = None # Autoregression self.n_lags = n_lags self.num_hidden_layers = num_hidden_layers self.d_hidden = n_lags + n_forecasts if d_hidden is None else d_hidden if self.n_lags > 0: self.ar_net = nn.ModuleList() d_inputs = self.n_lags for i in range(self.num_hidden_layers): self.ar_net.append( nn.Linear(d_inputs, self.d_hidden, bias=True)) d_inputs = self.d_hidden self.ar_net.append( nn.Linear(d_inputs, self.n_forecasts, bias=False)) for lay in self.ar_net: nn.init.kaiming_normal_(lay.weight, mode="fan_in") # Covariates self.config_covar = config_covar if self.config_covar is not None: assert self.n_lags > 0 self.covar_nets = nn.ModuleDict({}) for covar in self.config_covar.keys(): covar_net = nn.ModuleList() d_inputs = self.n_lags if self.config_covar[covar].as_scalar: d_inputs = 1 for i in range(self.num_hidden_layers): covar_net.append( nn.Linear(d_inputs, self.d_hidden, bias=True)) d_inputs = self.d_hidden covar_net.append( nn.Linear(d_inputs, self.n_forecasts, bias=False)) for lay in covar_net: nn.init.kaiming_normal_(lay.weight, mode="fan_in") self.covar_nets[covar] = covar_net ## Regressors self.config_regressors = config_regressors self.regressors_dims = regressors_config_to_model_dims( config_regressors) if self.regressors_dims is not None: n_additive_regressor_params = 0 n_multiplicative_regressor_params = 0 for name, configs in self.regressors_dims.items(): if configs["mode"] not in ["additive", "multiplicative"]: log.error( "Regressors mode {} not implemented. Defaulting to 'additive'." .format(configs["mode"])) self.regressors_dims[name]["mode"] = "additive" if configs["mode"] == "additive": n_additive_regressor_params += 1 elif configs["mode"] == "multiplicative": if self.config_trend is None: log.error("Multiplicative regressors require trend.") raise ValueError n_multiplicative_regressor_params += 1 self.regressor_params = nn.ParameterDict({ "additive": new_param(dims=[n_additive_regressor_params]), "multiplicative": new_param(dims=[n_multiplicative_regressor_params]), }) else: self.config_regressors = None
def __init__(self, classes: Sequence[str], n_leads: int, config: dict) -> NoReturn: """ finished, checked, Parameters: ----------- classes: sequence of int, name of the classes n_leads: int, number of input leads config: dict, other hyper-parameters, including kernel sizes, etc. ref. the corresponding config file """ super().__init__() self.classes = list(classes) self.n_classes = len(classes) self.__out_channels = len(classes) self.__in_channels = n_leads self.config = ED(deepcopy(config)) if self.__DEBUG__: print( f"configuration of {self.__name__} is as follows\n{dict_to_str(self.config)}" ) __debug_seq_len = 5000 # TODO: an init batch normalization? if self.config.init_batch_norm: self.init_bn = nn.BatchNorm1d( num_features=self.__in_channels, eps=1e-5, # default val momentum=0.1, # default val ) self.init_conv = TripleConv( in_channels=self.__in_channels, out_channels=self.config.init_num_filters, filter_lengths=self.config.init_filter_length, subsample_lengths=1, groups=self.config.groups, dropouts=self.config.init_dropouts, batch_norm=self.config.batch_norm, activation=self.config.activation, kw_activation=self.config.kw_activation, kernel_initializer=self.config.kernel_initializer, kw_initializer=self.config.kw_initializer, ) if self.__DEBUG__: __debug_output_shape = self.init_conv.compute_output_shape( __debug_seq_len) print( f"given seq_len = {__debug_seq_len}, init_conv output shape = {__debug_output_shape}" ) _, _, __debug_seq_len = __debug_output_shape self.down_blocks = nn.ModuleDict() in_channels = self.config.init_num_filters for idx in range(self.config.down_up_block_num - 1): self.down_blocks[f"down_{idx}"] = \ DownTripleConv( down_scale=self.config.down_scales[idx], in_channels=in_channels, out_channels=self.config.down_num_filters[idx], filter_lengths=self.config.down_filter_lengths[idx], groups=self.config.groups, dropouts=self.config.down_dropouts[idx], mode=self.config.down_mode, **(self.config.down_block) ) in_channels = self.config.down_num_filters[idx][-1] if self.__DEBUG__: __debug_output_shape = self.down_blocks[ f"down_{idx}"].compute_output_shape(__debug_seq_len) print( f"given seq_len = {__debug_seq_len}, down_{idx} output shape = {__debug_output_shape}" ) _, _, __debug_seq_len = __debug_output_shape self.bottom_block = DownBranchedDoubleConv( down_scale=self.config.down_scales[-1], in_channels=in_channels, out_channels=self.config.bottom_num_filters, filter_lengths=self.config.bottom_filter_lengths, dilations=self.config.bottom_dilations, groups=self.config.groups, dropouts=self.config.bottom_dropouts, mode=self.config.down_mode, **(self.config.down_block)) if self.__DEBUG__: __debug_output_shape = self.bottom_block.compute_output_shape( __debug_seq_len) print( f"given seq_len = {__debug_seq_len}, bottom_block output shape = {__debug_output_shape}" ) _, _, __debug_seq_len = __debug_output_shape self.up_blocks = nn.ModuleDict() # in_channels = sum([branch[-1] for branch in self.config.bottom_num_filters]) in_channels = self.bottom_block.compute_output_shape(None, None)[1] for idx in range(self.config.down_up_block_num): self.up_blocks[f"up_{idx}"] = \ UpTripleConv( up_scale=self.config.up_scales[idx], in_channels=in_channels, out_channels=self.config.up_num_filters[idx], filter_lengths=self.config.up_conv_filter_lengths[idx], deconv_filter_length=self.config.up_deconv_filter_lengths[idx], groups=self.config.groups, mode=self.config.up_mode, dropouts=self.config.up_dropouts[idx], **(self.config.up_block) ) in_channels = self.config.up_num_filters[idx][-1] if self.__DEBUG__: __debug_output_shape = self.up_blocks[ f"up_{idx}"].compute_output_shape(__debug_seq_len) print( f"given seq_len = {__debug_seq_len}, up_{idx} output shape = {__debug_output_shape}" ) _, _, __debug_seq_len = __debug_output_shape self.out_conv = Conv_Bn_Activation( in_channels=self.config.up_num_filters[-1][-1], out_channels=self.__out_channels, kernel_size=self.config.out_filter_length, stride=1, groups=self.config.groups, batch_norm=self.config.batch_norm, activation=self.config.activation, kw_activation=self.config.kw_activation, kernel_initializer=self.config.kernel_initializer, kw_initializer=self.config.kw_initializer, ) if self.__DEBUG__: __debug_output_shape = self.out_conv.compute_output_shape( __debug_seq_len) print( f"given seq_len = {__debug_seq_len}, out_conv output shape = {__debug_output_shape}" ) # for inference # if background counted in `classes`, use softmax # otherwise use sigmoid self.softmax = nn.Softmax(-1) self.sigmoid = nn.Sigmoid()
def __init__( self, modality_feature_sizes, num_classes, num_layers=2, hidden_size=100, num_heads=4, max_length=512, inner_size=400, dropout=0.1, nystrom=True, num_landmarks=32, kernel_size=33, prenorm=True, scalenorm=True, multi_modal_drop="mmdrop", p_mmdrop=0.5, p_drop_modalities=None, ): super(TransformerLateFusionClassifier, self).__init__() self.modalities = modality_feature_sizes.keys() self.modality_encoders = nn.ModuleDict( { m: TransformerSequenceEncoder( modality_feature_sizes[m], feature_normalization=True if m == "audio" else False, num_layers=num_layers, hidden_size=hidden_size, num_heads=num_heads, max_length=max_length, inner_size=inner_size, dropout=dropout, nystrom=nystrom, num_landmarks=num_landmarks, kernel_size=kernel_size, prenorm=prenorm, scalenorm=scalenorm, ) for m in self.modalities } ) self.modality_drop = None self.mmdrop = None if multi_modal_drop == "mmdrop_hard": self.mmdrop = MultimodalDropout( p=p_mmdrop, n_modalities=len(self.modalities), p_mod=p_drop_modalities, mode="hard", ) elif multi_modal_drop == "mmdrop_soft": self.mmdrop = MultimodalDropout( p=p_mmdrop, n_modalities=len(self.modalities), p_mod=p_drop_modalities, mode="soft", ) elif multi_modal_drop == "dropout": self.modality_drop = nn.Dropout(p_mmdrop) elif multi_modal_drop == "both": self.mmdrop = MultimodalDropout( p=p_mmdrop, n_modalities=len(self.modalities), p_mod=p_drop_modalities, mode="hard", ) self.modality_drop = nn.Dropout(p_mmdrop) elif multi_modal_drop == "none": pass else: raise ValueError( "Not a specified mmdrop value given. Pls check your config file." ) self.out_size = sum([e.out_size for e in self.modality_encoders.values()]) self.clf = nn.Sequential( nn.Linear(self.out_size, self.out_size), nn.ReLU(), nn.Dropout(dropout), nn.Linear(self.out_size, num_classes), )
def __init__(self, params, pembeds, sizes=None, maps=None, lab2ign=None): super(BaseNet, self).__init__() self.edg = ['MM', 'SS', 'ME', 'MS', 'ES', 'EE'] self.dims = {} for k in self.edg: self.dims[k] = 4 * params['lstm_dim'] self.device = torch.device("cuda:{}".format(params['gpu']) if params['gpu'] != -1 else "cpu") self.encoder = Encoder(input_size=params['word_dim'], rnn_size=params['out_dim'], num_layers=1, bidirectional=True, dropout=0.0) self.word_embed = EmbedLayer(num_embeddings=sizes['word_size'], embedding_dim=params['word_dim'], dropout=params['drop_i'], ignore=None, freeze=params['freeze_words'], pretrained=pembeds, mapping=maps['word2idx']) if params['dist']: self.dims['MM'] += params['dist_dim'] self.dims['SS'] += params['dist_dim'] self.dist_embed = EmbedLayer(num_embeddings=sizes['dist_size'] + 1, embedding_dim=params['dist_dim'], dropout=0.0, ignore=sizes['dist_size'], freeze=False, pretrained=None, mapping=None) if params['context']: self.dims['MM'] += (2 * params['lstm_dim']) self.attention = Dot_Attention(input_size=2 * params['lstm_dim'], device=self.device, scale=False) if params['types']: for k in self.edg: self.dims[k] += (2 * params['type_dim']) self.type_embed = EmbedLayer(num_embeddings=3, embedding_dim=params['type_dim'], dropout=0.0, freeze=False, pretrained=None, mapping=None) self.reduce = nn.ModuleDict() for k in self.edg: if k != 'EE': self.reduce.update({k: nn.Linear(self.dims[k], params['out_dim'], bias=False)}) elif (('EE' in params['edges']) or ('FULL' in params['edges'])) and (k == 'EE'): self.ee = True self.reduce.update({k: nn.Linear(self.dims[k], params['out_dim'], bias=False)}) else: self.ee = False if params['walks_iter'] and params['walks_iter'] > 0: self.walk = WalkLayer(input_size=params['out_dim'], iters=params['walks_iter'], beta=params['beta'], device=self.device) self.classifier = Classifier(in_size=params['out_dim'], out_size=sizes['rel_size'], dropout=params['drop_o']) self.loss = nn.CrossEntropyLoss() # hyper-parameters for tuning self.beta = params['beta'] self.dist_dim = params['dist_dim'] self.type_dim = params['type_dim'] self.drop_i = params['drop_i'] self.drop_o = params['drop_o'] self.gradc = params['gc'] self.learn = params['lr'] self.reg = params['reg'] self.out_dim = params['out_dim'] # other parameters self.mappings = {'word': maps['word2idx'], 'type': maps['type2idx'], 'dist': maps['dist2idx']} self.inv_mappings = {'word': maps['idx2word'], 'type': maps['idx2type'], 'dist': maps['idx2dist']} self.word_dim = params['word_dim'] self.lstm_dim = params['lstm_dim'] self.walks_iter = params['walks_iter'] self.rel_size = sizes['rel_size'] self.types = params['types'] self.ignore_label = lab2ign self.context = params['context'] self.dist = params['dist']
def __init__(self, model_cfg, block='residual', input_size=(1, 256, 256), classes=None, last_act="linear", conv_transpose=False, bn=True, architecture=None, big_drop=0., small_drop=0., sddrop=0., se_ratio=0.0, input_format=None, output_format=None, multi_scale=False, multi_input=False): # Parse the network's architecture if architecture is None: architecture = { "first": 32, "enc": { "width": [16, 32, 48, 96], "repeat": [2, 3, 3, 4] }, "dec": { "width": [48, 32, 32], "repeat": [2, 2, 1] } } arch = architecture if not "dilation" in arch["enc"]: arch["enc"]["dilation"] = [1] * len(arch["enc"]["repeat"]) assert len( {"first", "enc", "dec"} - {*list(arch.keys())}) == 0, "Missing keys: Need enc, dec, first" assert len({"repeat", "width"} - {*list(arch["enc"].keys())} ) == 0, "Missing keys enc: Need width, repeat" assert len({"repeat", "width"} - {*list(arch["dec"].keys())} ) == 0, "Missing keys dec: Need width, repeat" assert len(arch["enc"]["repeat"]) == len( arch["enc"]["width"]), "Mismatched dimensions" assert len(arch["enc"]["repeat"]) == len( arch["enc"]["dilation"]), "Mismatched dimensions" assert len(arch["dec"]["repeat"]) == len( arch["dec"]["width"]), "Mismatched dimensions" self.arch = arch arch["width"] = arch["enc"]["width"] + arch["dec"]["width"] arch_enc_len = len(arch["enc"]["width"]) arch_dec_len = len(arch["dec"]["width"]) # Construct Super params (input/output-format, tops etc.) super().__init__(model_cfg=model_cfg, classes=classes, last_act=last_act, output_format=output_format, input_format=input_format, input_size=input_size, repeat_outputs=arch_dec_len + 1 if multi_scale else None) self.classes = classes self.n_classes = len(classes) + 1 self.conv_transpose = conv_transpose self.multi_scale = multi_scale self.multi_input = multi_input # Generate Basic building block & Bigger block CBA = self.CBA all_blocks = get_all_blocks() if type(block) is not list: block = [block, block] blocks = {} for bl, name in zip(block, ["enc", "dec"]): if bl not in all_blocks: raise ValueError("Block " + bl + " is not a valid block option") blocks[name] = all_blocks[bl] # Encoder bw = first_bw = arch["first"] self.input_process = {} for key, in_size in zip(self.input_format, self.input_format_sizes): self.input_process[key] = CBA(in_size, bw, 3, bn=bn, act=True) self.input_process = nn.ModuleDict(self.input_process) def get_encoder(wfuse=False): prev_bw = arch["first"] skips_bw = [] encoder = [] fusions = [] for i, (repeat_block, dilation) in enumerate( zip(self.arch["enc"]["repeat"], self.arch["enc"]["dilation"])): is_last = (i + 1 == arch_enc_len) if wfuse: fusions.append( FusionModule( model_cfg, in_width=prev_bw, Block=CBA, n_inputs=len(self.input_format) + (0 if i == 0 else 1), # Own new branch multi_inputs=None if not multi_input or i == 0 else [first_bw] * len(self.input_format), )) new_bw = arch["width"][i] for j in range(repeat_block): pool = "max" if j + 1 == repeat_block and not is_last else None drop = small_drop if ( not is_last) or j + 1 < repeat_block else big_drop encoder.append( ConvBlock(model_cfg, blocks["enc"], prev_bw, new_bw, 3, bn=bn, pool=pool, conv_transpose=self.conv_transpose, drop=(drop, sddrop), se_ratio=se_ratio, dilation=dilation, first=(i == 0))) prev_bw = new_bw skips_bw.append(prev_bw) if wfuse: fusions.append( nn.Sequential( FusionModule( model_cfg, in_width=prev_bw, n_inputs=len(self.input_format) + 1, ), UpSampling(model_cfg, in_width=prev_bw, width=arch["width"][i + 1]))) return encoder, skips_bw, fusions else: return encoder self.encoders = [] for _ in self.input_format: # all the basic encoders self.encoders.append(nn.ModuleList(get_encoder())) f_enc, skips_bw, fusions = get_encoder( wfuse=True) # the fusion encoder self.fusions = nn.ModuleList(fusions) self.encoders.append(nn.ModuleList(f_enc)) self.encoders = nn.ModuleList(self.encoders) # Decoders (Classif, Pif, Paf...) skips_bw.reverse() # Reverse for easier indexing def get_decoder(prev_bw): decoder = [] tops_prev_bw = [] tops_upsample = [] for i, repeat_block in enumerate(self.arch["dec"]["repeat"]): if self.multi_scale: tops_prev_bw.append(prev_bw) tops_upsample.append(2**(arch_dec_len - i - 1)) is_last = (i + 1 == arch_dec_len) new_bw = arch["width"][arch_enc_len + i] for j in range(repeat_block): pool = "up" if not is_last and j + 1 == repeat_block else None has_skip = j == 0 concat_width = skips_bw[i + 1] if has_skip else None elems = [ ConvBlock(model_cfg, blocks["dec"], prev_bw, new_bw, 3, bn=bn, concatenate=has_skip, concat_width=concat_width, conv_transpose=self.conv_transpose, drop=(small_drop, sddrop), se_ratio=se_ratio, last_only=True) ] prev_bw = new_bw if pool is not None: new_bw = arch["width"][arch_enc_len + i + 1] # search for next one elems.append( UpSampling(model_cfg, in_width=prev_bw, width=new_bw)) prev_bw = new_bw decoder.append(nn.Sequential(*elems)) tops_prev_bw.append(prev_bw) tops_upsample.append(1) return decoder, tops_prev_bw, tops_upsample enc_prev_bw = arch["width"][arch_enc_len] decoder_types = { "mask": lambda: get_decoder(enc_prev_bw), "keypoints": lambda: get_decoder(enc_prev_bw), "class": lambda: ( [SubIdentity()] * (np.sum(arch["dec"]["repeat"])), # FIXME: not really efficient enc_prev_bw) } self.decoders = [] decoder_tops_prev_bw = [] decoder_tops_upsample = [] for out_class in self.inference_output_format: decoder, bw_dec, up_dec = decoder_types[out_class.name]() self.decoders.append(nn.ModuleList(decoder)) decoder_tops_prev_bw += bw_dec decoder_tops_upsample += up_dec self.decoders = nn.ModuleList(self.decoders) # Tops self.tops = self.make_tops(decoder_tops_prev_bw, decoder_tops_upsample)
def __init__(self, projection_names, d_inp=512): super(TokenMultiProjectionEncoder, self).__init__() self.projections = nn.ModuleDict({ name: TokenProjectionEncoder(d_inp=d_inp) for name in projection_names })
def __init__(self, in_ch, do_task_list, fc=1, fc_nc=64, n=1): super(FastNeuralStyleTransfer, self).__init__() self.nc_list = [ 32 * n, 64 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 64 * n, 32 * n ] self.do_task_list = do_task_list task_num = len(do_task_list) # self.film_generator = film_generator(sum(self.nc_list), task_num-1, fc, fc_nc) self.film_generator = film_generator(sum(self.nc_list), task_num, fc, fc_nc) # Initial convolution layers self.encoder = nn.ModuleDict({ 'conv1': ConvLayer(in_ch, 32 * n, kernel_size=9, stride=1), 'film1': film(32 * n, task_num), 'conv2': ConvLayer(32 * n, 64 * n, kernel_size=3, stride=2), 'film2': film(64 * n, task_num), 'conv3': ConvLayer(64 * n, 128 * n, kernel_size=3, stride=2), 'film3': film(128 * n, task_num), }) # Residual layers self.res = nn.ModuleDict({ 'res1': ResidualBlock(128 * n, task_num), 'res2': ResidualBlock(128 * n, task_num), 'res3': ResidualBlock(128 * n, task_num), 'res4': ResidualBlock(128 * n, task_num), 'res5': ResidualBlock(128 * n, task_num), }) # Upsampling Layers self.decoder = nn.ModuleDict({ 'deconv1': UpsampleConvLayer(128 * n, 64 * n, kernel_size=3, stride=1, upsample=2), 'film4': film(64 * n, task_num), 'deconv2': UpsampleConvLayer(64 * n, 32 * n, kernel_size=3, stride=1, upsample=2), 'film5': film(32 * n, task_num) }) self.lastconv_dic = nn.ModuleDict({}) for task in self.do_task_list: if task == 'autoencoder': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 3, kernel_size=9, stride=1), nn.Tanh()) elif task == 'segment_semantic': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 17, kernel_size=9, stride=1)) elif task == 'edge_texture': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh()) elif task == 'edge_occlusion': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh()) elif task == 'normal': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 3, kernel_size=9, stride=1), nn.Tanh()) elif task == 'principal_curvature': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 3, kernel_size=9, stride=1), nn.Tanh()) elif task == 'keypoints2d': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh()) elif task == 'keypoints3d': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh()) elif task == 'depth_zbuffer': self.lastconv_dic[task] = nn.Sequential( ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh()) # Non-linearities self.relu = torch.nn.ReLU() self._initialize_weights()
def __init__( self, device, preproc, word_emb_size, num_latent_relations, hidden_size=300, recurrent_size=256, discrete_relation=True, norm_relation=True, symmetric_relation=False, combine_latent_relations=False, score_type="bilinear", learnable_embeddings=False, question_encoder=("shared-en-emb", ), column_encoder=("shared-en-emb", ), table_encoder=("shared-en-emb", ), ): super().__init__() self.preproc = preproc self.vocab = preproc.vocab self.word_emb_size = word_emb_size self._device = device self.hidden_size = hidden_size self.discrete_relation = discrete_relation self.norm_relation = norm_relation self.num_latent_relations = num_latent_relations self.relations2id = preproc.relations2id self.recurrent_size = recurrent_size self.dropout = 0.0 score_funcs = { "bilinear": lambda: energys.Bilinear( hidden_size, num_latent_relations, include_id=True), "mlp": lambda: energys.MLP(hidden_size, num_latent_relations), } # build modules if learnable_embeddings: self.en_learnable_words = self.vocab else: self.en_learnable_words = None shared_modules = { "shared-en-emb": embedders.LookupEmbeddings( self._device, self.vocab, self.preproc.word_emb, self.word_emb_size, learnable_words=self.en_learnable_words, ), } if self.preproc.use_ch_vocab: self.ch_vocab = preproc.ch_vocab if learnable_embeddings: self.ch_learnable_words = self.ch_vocab else: self.ch_learnable_words = None shared_modules["shared-ch-emb"] = embedders.LookupEmbeddings( self._device, self.ch_vocab, self.preproc.ch_word_emb, self.preproc.ch_word_emb.dim, learnable_words=self.ch_learnable_words, ) shared_modules["ch-bilstm"] = lstm.BiLSTM( input_size=self.preproc.ch_word_emb.dim, output_size=self.recurrent_size, dropout=self.dropout, use_native=False, summarize=False, ) shared_modules["ch-bilstm-native"] = lstm.BiLSTM( input_size=self.preproc.ch_word_emb.dim, output_size=self.recurrent_size, dropout=self.dropout, use_native=True, summarize=False, ) self.question_encoder = self._build_modules( question_encoder, shared_modules=shared_modules) self.column_encoder = self._build_modules( column_encoder, shared_modules=shared_modules) self.table_encoder = self._build_modules(table_encoder, shared_modules=shared_modules) self.combine_latent_relations = combine_latent_relations if combine_latent_relations: self.string_link = StringLinking(device, preproc) self.symmetric_relation = symmetric_relation assert self.symmetric_relation if self.symmetric_relation: relations = ("qc", "qt") else: relations = ("qc", "cq", "tq", "qt") self.relation_score_dic = nn.ModuleDict( {k: score_funcs[score_type]() for k in relations}) if discrete_relation: self.temperature = 1 # for gumbel if not norm_relation: # then norm q/col/tab self.null_q_token = nn.Parameter(torch.zeros([1, hidden_size])) self.null_c_token = nn.Parameter(torch.zeros([1, hidden_size])) self.null_t_token = nn.Parameter(torch.zeros([1, hidden_size]))
def __init__(self, num_classes, loss, block, layers, last_stride=2, fc_dims=None, attribute_list=None, attr_dims=None, **kwargs): self.inplanes = 64 super(ResNetMid, self).__init__() self.loss = loss self.feature_dim = 512 * block.expansion print("Attribute_list = ", attribute_list) print("Attribute_dims = ", attr_dims) # backbone network self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=last_stride) self.global_avgpool = nn.AdaptiveAvgPool2d(1) assert fc_dims is not None # Remove dropout if it doesn't improve self.fc_fusion = self._construct_fc_layer(fc_dims, 512 * block.expansion * 2) self.feature_dim += 512 * block.expansion self.attr_dims = attr_dims #self.classifier = nn.Linear(self.feature_dim, num_classes) # modify the final layer to contain classifiers for person id, attributes # attribute_list is expected to contain a dict with key as attribute name and value as array of possible values # self.classifiers = nn.ModuleList() # self.attributes = [('id', num_classes)] # self.classifiers.append(nn.Linear(self.feature_dim, num_classes)) self.classifiers = nn.ModuleDict() self.classifiers["id"] = nn.Linear(self.feature_dim, num_classes) if attribute_list is not None: for atrribute_name, choices in attribute_list.items(): self.classifiers[atrribute_name] = nn.Linear( self.feature_dim, len(choices)) if self.attr_dims is not None: self.classifiers[atrribute_name] = nn.Sequential(*[ nn.Linear(self.feature_dim, self.attr_dims), nn.ReLU(), nn.Linear(self.attr_dims, len(choices)) ]) # for name, length in self.attributes: # setattr(self, 'attr_' + name, nn.Linear(self.feature_dim, length)) # self.test_layer = nn.Linear(self.feature_dim, 10) self._init_params()
def __init__(self, cfg: DictConfig, trainer: Trainer = None): # Get global rank and total number of GPU workers for IterableDataset partitioning, if applicable # Global_rank and local_rank is set by LightningModule in Lightning 1.2.0 self.world_size = 1 if trainer is not None: self.world_size = trainer.world_size super().__init__(cfg=cfg, trainer=trainer) self.preprocessor = SpeechEncDecSelfSupervisedModel.from_config_dict( self._cfg.preprocessor) self.encoder = SpeechEncDecSelfSupervisedModel.from_config_dict( self._cfg.encoder) self.decoder_losses = None if "loss_list" in self._cfg: self.decoder_losses = {} self.loss_alphas = {} self.start_step = {} self.output_from_layer = {} self.transpose_encoded = {} self.targets_from_loss = {} # need to be separate for moduledict for decoder_loss_name, decoder_loss_cfg in self._cfg.loss_list.items( ): new_decoder_loss = { 'decoder': SpeechEncDecSelfSupervisedModel.from_config_dict( decoder_loss_cfg.decoder), 'loss': SpeechEncDecSelfSupervisedModel.from_config_dict( decoder_loss_cfg.loss), } new_decoder_loss = nn.ModuleDict(new_decoder_loss) self.decoder_losses[decoder_loss_name] = new_decoder_loss self.loss_alphas[decoder_loss_name] = decoder_loss_cfg.get( "loss_alpha", 1.0) self.output_from_layer[ decoder_loss_name] = decoder_loss_cfg.get( "output_from_layer", None) self.targets_from_loss[ decoder_loss_name] = decoder_loss_cfg.get( "targets_from_loss", None) self.start_step[decoder_loss_name] = decoder_loss_cfg.get( "start_step", 0) self.transpose_encoded[ decoder_loss_name] = decoder_loss_cfg.get( "transpose_encoded", False) if self.output_from_layer[decoder_loss_name] is not None: self.set_access_enabled(access_enabled=True) self.decoder_losses = nn.ModuleDict(self.decoder_losses) else: self.decoder_ssl = SpeechEncDecSelfSupervisedModel.from_config_dict( self._cfg.decoder) self.loss = SpeechEncDecSelfSupervisedModel.from_config_dict( self._cfg.loss) self.spec_augmentation = SpeechEncDecSelfSupervisedModel.from_config_dict( self._cfg.spec_augment) # dropout for features/spectrograms (applied before masking) self.dropout_features = (torch.nn.Dropout(self._cfg.dropout_features) if "dropout_features" in self._cfg else None) # dropout for targets (applied before quantization) self.dropout_features_q = (torch.nn.Dropout( self._cfg.dropout_features_q) if "dropout_features_q" in self._cfg else None) # Feature penalty for preprocessor encodings (for Wav2Vec training) if "feature_penalty" in self._cfg: self.feat_pen, self.pen_factor = 0.0, self._cfg.feature_penalty else: self.feat_pen, self.pen_factor = None, None if "access" in self._cfg: set_access_cfg(self._cfg.access) self.apply_masking = True
def change_model_spec(self, model_spec, initial=False, verbose=False): # Setup a graph structure to simplify our life dag = nx.from_numpy_matrix(model_spec.matrix, create_using=nx.DiGraph()) node_labels = {} for i, op in enumerate(model_spec.ops): if op == "input" or op == "output": node_labels[i] = op else: node_labels[i] = "vertex_%d" % i dag = nx.relabel_nodes(dag, node_labels) # Resolve dependencies in graph self.execution_order = self._get_execution_order(dag) # Setup output_sizes for operations and assign vertex types out_shapes_list = compute_vertex_channels(self.input_channels, self.output_channels, model_spec.matrix) if verbose: logging.info('vertex channels %s', str(out_shapes_list)) if initial: # generate the maximum possible channels. out_shapes_list = [ self.input_channels, ] + [ self.output_channels, ] * (len(out_shapes_list) - 1) out_shapes = {} vertex_types = {} for t, (shape, op) in enumerate(zip(out_shapes_list, model_spec.ops)): out_shapes[node_labels[t]] = shape vertex_types[node_labels[t]] = op self.dag = dag # print('node labels', node_labels) # print('out_shapes_list', out_shapes_list) # print('out_shapes', out_shapes) # print('vertex_types', vertex_types) # return node_labels, out_shapes, vertex_types, out_shapes_list # Setup the operations if initial: self.vertex_ops = nn.ModuleDict() for output_node, input_nodes in self.execution_order.items(): if output_node == "output": continue # Setup all input shapes in_shapes = [out_shapes[node] for node in input_nodes] # Check if any of the inputs to the vertex comes form input to module is_input = [node == "input" for node in input_nodes] if initial: # Setup the operation self.vertex_ops[output_node] = self.vertex_cls( in_shapes, out_shapes[output_node], vertex_types[output_node], is_input, self.args) else: # get the input_nodes order, by [input, vertex_i] input_nodes_id = [ 0 if x == 'input' else int(x.split('vertex_')[1]) for x in input_nodes ] self.vertex_ops[output_node].change_vertex_type( in_shapes, out_shapes[output_node], vertex_types[output_node], input_nodes_id) # Handle skip connections to output self.has_skip = self.dag.has_edge("input", "output") if self.has_skip: # if len(self.execution_order['output']) > 1: self.execution_order["output"].remove("input") if len(self.execution_order['output']) == 0: del self.execution_order['output']