예제 #1
0
    def __init__(self, config=None):
        super().__init__()

        config = config if config is not None else self._get_default_config()
        self.config = config
        self.network_height = len(config['num_filters'])

        # downsampling blocks
        self.conv_down = nn.ModuleDict()
        self.fgru_down = nn.ModuleDict()
        self.pool = nn.ModuleDict()
        for i in range(self.network_height - 1):
            in_c = config['in_channels'] if i == 0 else config['num_filters'][
                i - 1]
            blk = self._conv_block(in_c,
                                   config['num_filters'][i],
                                   kernel_size=config['conv_kernel_size'][i],
                                   blocksize=config['conv_blocksize'][i],
                                   normtype=config['conv_normtype'],
                                   dropout_p=config['conv_dropout_p'],
                                   name='')
            self.conv_down[str(i)] = blk
            fgru_cell = fConvGRUCell(
                config['num_filters'][i], config['fgru_hidden_size'][i],
                config['fgru_kernel_size'][i], config['fgru_timesteps'],
                config['fgru_normtype'], config['fgru_channel_sym'],
                config['fgru_attention_args'])
            self.fgru_down[str(i)] = fgru_cell
            self.pool[str(i)] = nn.MaxPool2d(kernel_size=2, stride=2)

        # bottleneck
        self.conv_bottleneck = self._conv_block(
            config['num_filters'][-2],
            config['num_filters'][-1],
            kernel_size=config['conv_kernel_size'][-1],
            blocksize=config['conv_blocksize'][-1],
            normtype=config['conv_normtype'],
            dropout_p=config['conv_dropout_p'])
        self.fgru_bottleneck = fConvGRUCell(
            config['num_filters'][-1], config['fgru_hidden_size'][-1],
            config['fgru_kernel_size'][self.network_height - 1],
            config['fgru_timesteps'], config['fgru_normtype'],
            config['fgru_channel_sym'], config['fgru_attention_args'])

        # upsampling blocks
        self.upsample = nn.ModuleDict()
        self.ups_conv = nn.ModuleDict()
        self.conv_up = nn.ModuleDict()
        self.fgru_up = nn.ModuleDict()
        for i in range(self.network_height - 2, -1,
                       -1):  # 2nd-to-deepest to first level

            # upsampling operations
            if config['upsample_mode'] == 'transpose':
                if config['upsample_all2all']:
                    raise NotImplementedError(
                        'Transpose mode does not support all-to-all')
                self.upsample[str(i)] = nn.ConvTranspose2d(
                    config['num_filters'][i + 1],
                    config['num_filters'][i],
                    kernel_size=2,
                    stride=2)
            else:
                # ups_out_dims = tuple(
                #     np.array(config['in_dims'][:2]) // (2 ** i))
                if config[
                        'upsample_all2all']:  # will concat fgru act from all layers below
                    ups_in_channels = [config['num_filters'][i + 1]]
                    for j in range(i + 1, self.network_height):
                        ups = nn.Upsample(scale_factor=2**(j - i),
                                          mode=config['upsample_mode'],
                                          align_corners=False)
                        self.upsample["{}-{}".format(j, i)] = ups
                        ups_in_channels += [config['num_filters'][j]]
                    ups_in_channels = sum(ups_in_channels)
                else:
                    ups = nn.Upsample(scale_factor=2,
                                      mode=config['upsample_mode'],
                                      align_corners=False)
                    self.upsample["{}-{}".format(i + 1, i)] = ups
                    ups_in_channels = config['num_filters'][i + 1]
                self.ups_conv[str(i)] = nn.Conv2d(ups_in_channels,
                                                  config['num_filters'][i],
                                                  kernel_size=1)

            # conv block
            blk = self._conv_block(
                config['num_filters'][i] * 2,  # concat'd skip activity
                config['num_filters'][i],
                kernel_size=config['conv_kernel_size'][i],
                blocksize=config['conv_blocksize'][i],
                normtype=config['conv_normtype'],
                dropout_p=config['conv_dropout_p'],
                name='')
            self.conv_up[str(i)] = blk

            # fgru
            fgru_cell = fConvGRUCell(
                config['num_filters'][i], config['fgru_hidden_size'][i],
                config['fgru_kernel_size'][(self.network_height * 2 - 2) - i],
                config['fgru_timesteps'], config['fgru_normtype'],
                config['fgru_channel_sym'], config['fgru_attention_args'])
            self.fgru_up[str(i)] = fgru_cell
예제 #2
0
파일: transformer.py 프로젝트: j-luo93/XLM
    def __init__(self, params, dico, is_encoder, with_output):
        """
        Transformer model (encoder or decoder).
        """
        super().__init__()

        # encoder / decoder, output layer
        self.is_encoder = is_encoder
        self.is_decoder = not is_encoder
        self.with_output = with_output

        # dictionary / languages
        self.n_langs = params.n_langs
        self.n_words = params.n_words
        self.eos_index = params.eos_index
        self.pad_index = params.pad_index
        self.dico = dico
        self.id2lang = params.id2lang
        self.lang2id = params.lang2id
        self.use_lang_emb = getattr(params, 'use_lang_emb', True)
        assert len(self.dico) == self.n_words
        assert len(self.id2lang) == len(self.lang2id) == self.n_langs

        # model parameters
        self.dim = params.emb_dim       # 512 by default
        self.hidden_dim = self.dim * 4  # 2048 by default
        self.n_heads = params.n_heads   # 8 by default
        self.n_layers = params.n_layers
        self.dropout = params.dropout
        self.attention_dropout = params.attention_dropout
        assert self.dim % self.n_heads == 0, 'transformer dim must be a multiple of n_heads'

        # embeddings
        self.position_embeddings = Embedding(N_MAX_POSITIONS, self.dim)
        if params.sinusoidal_embeddings:
            create_sinusoidal_embeddings(N_MAX_POSITIONS, self.dim, out=self.position_embeddings.weight)
        if params.n_langs > 1 and self.use_lang_emb:
            self.lang_embeddings = Embedding(self.n_langs, self.dim)
        self.embeddings = Embedding(self.n_words, self.dim, padding_idx=self.pad_index)
        self.layer_norm_emb = nn.LayerNorm(self.dim, eps=1e-12)

        # transformer layers
        self.attentions = nn.ModuleList()
        self.layer_norm1 = nn.ModuleList()
        self.ffns = nn.ModuleList()
        self.layer_norm2 = nn.ModuleList()
        if self.is_decoder:
            self.layer_norm15 = nn.ModuleList()
            self.encoder_attn = nn.ModuleList()

        # memories
        self.memories = nn.ModuleDict()
        if getattr(params, 'use_memory', False):
            mem_positions = params.mem_enc_positions if is_encoder else params.mem_dec_positions
            for layer_id, pos in mem_positions:
                assert 0 <= layer_id <= params.n_layers - 1
                assert pos in ['in', 'after']
                self.memories['%i_%s' % (layer_id, pos)] = HashingMemory.build(self.dim, self.dim, params)

        for layer_id in range(self.n_layers):
            self.attentions.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout))
            self.layer_norm1.append(nn.LayerNorm(self.dim, eps=1e-12))
            if self.is_decoder:
                self.layer_norm15.append(nn.LayerNorm(self.dim, eps=1e-12))
                self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout))
            if ('%i_in' % layer_id) in self.memories:
                self.ffns.append(None)
            else:
                self.ffns.append(TransformerFFN(self.dim, self.hidden_dim, self.dim,
                                                dropout=self.dropout, gelu_activation=params.gelu_activation))
            self.layer_norm2.append(nn.LayerNorm(self.dim, eps=1e-12))

        # output layer
        if self.with_output:
            self.pred_layer = PredLayer(params)
            if params.share_inout_emb:
                self.pred_layer.proj.weight = self.embeddings.weight

        self.use_positional_embedding = params.use_positional_embedding
예제 #3
0
    def __init__(self,
                 wavenumbers=None,
                 param_file=None,
                 dtype=torch.float64,
                 device='cuda'):
        """
        wavenumbers: torch tensor, emissivity is evaluated at each wavenumber passed
        n_freqs: int, number of resonant frequencies in the system
        """
        super().__init__()

        if wavenumbers is None or param_file is None:
            print(
                'must initialize InfraRenderProject with wavenumbers and param file'
            )
            return

        # setup convolutional layers
        self.device = device
        self.dtype = dtype
        self.wavenumbers = wavenumbers
        self.mixture_model = InverseRenderMixtureModel(paramFile=param_file,
                                                       wavenumbers=wavenumbers,
                                                       dtype=dtype,
                                                       device=device)
        self.conv1 = nn.Conv1d(1, 3, 5)
        self.conv2 = nn.Conv1d(3, 6, 4)
        self.conv3 = nn.Conv1d(6, 12, 5)
        self.conv4 = nn.Conv1d(12, 24, 4)
        self.pool = nn.MaxPool1d(2)

        # use dummy to compute size of convolutional layer output
        with torch.no_grad():
            dummy = torch.empty((1, wavenumbers.shape[0]))
            dummy = self.convolutions(dummy)
            dummy = torch.flatten(dummy, start_dim=1)

        self.fc1 = nn.Linear(dummy.shape[1], 150)
        self.fc2 = nn.Linear(150, 150)

        # setup fully connected layers and renderer
        self.fc_freqs_dict = nn.ModuleDict()
        self.fc_gammas_dict = nn.ModuleDict()
        self.fc_rhos_dict = nn.ModuleDict()
        self.fc_epsilon_dict = nn.ModuleDict()
        self.fc_mode_weight_dict = nn.ModuleDict()

        for key, endmember in self.mixture_model.endmemberModels.items():
            self.fc_freqs_dict[key] = nn.ModuleList()
            self.fc_gammas_dict[key] = nn.ModuleList()
            self.fc_rhos_dict[key] = nn.ModuleList()
            self.fc_epsilon_dict[key] = nn.ModuleList()
            self.fc_mode_weight_dict[key] = nn.ModuleList()
            for mode_idx, mode in enumerate(endmember.modes):
                self.fc_freqs_dict[key].append(
                    nn.Linear(150, mode.freqs.shape[0]))
                self.fc_gammas_dict[key].append(
                    nn.Linear(150, mode.gammas.shape[0]))
                self.fc_rhos_dict[key].append(
                    nn.Linear(150, mode.rhos.shape[0]))
                self.fc_epsilon_dict[key].append(nn.Linear(150, 1))
                self.fc_mode_weight_dict[key].append(nn.Linear(150, 1))

        self.fc_abundances = nn.Linear(
            150, self.mixture_model.endmemberModels.__len__())
        self.endmemberSpectra = None
        self.abundances = None
        self.pred_spectra = None
        self.mse = torch.nn.MSELoss(reduction='mean')
예제 #4
0
 def __init__(self, in_size, out_size, etypes):
     super(HeteroRGCNLayer, self).__init__()
     # W_r for each relation
     self.weight = nn.ModuleDict({
             name: nn.Linear(in_size, out_size) for name in etypes
         })
예제 #5
0
def create_task(args, entity_symbols=None, slice_datasets=None):
    """Returns an EmmentalTask for named entity disambiguation (NED).

    Args:
        args: args
        entity_symbols: entity symbols (default None)
        slice_datasets: slice datasets used in scorer (default None)

    Returns: EmmentalTask for NED
    """

    if entity_symbols is None:
        entity_symbols = EntitySymbols.load_from_cache(
            load_dir=os.path.join(args.data_config.entity_dir,
                                  args.data_config.entity_map_dir),
            alias_cand_map_file=args.data_config.alias_cand_map,
            alias_idx_file=args.data_config.alias_idx_map,
        )

    # Create sentence encoder
    bert_model = BertEncoder(args.data_config.word_embedding,
                             output_size=args.model_config.hidden_size)

    # Gets the tasks that query for the individual embeddings (e.g., word, entity, type, kg)
    # The device dict will store which embedding modules we want on the cpu
    (
        embedding_task_flows,  # task flows for standard embeddings (e.g., kg, type, entity)
        embedding_module_pool,  # module for standard embeddings
        embedding_module_device_dict,  # module device dict for standard embeddings
        # some embeddings output indices for BERT so we handle these embeddings in our BERT layer
        # (see comments in get_through_bert_embedding_tasks)
        extra_bert_embedding_layers,
        embedding_payload_inputs,  # the layers that are fed into the payload
        embedding_total_sizes,  # total size of all embeddings
    ) = get_embedding_tasks(args, entity_symbols)

    # Add the extra embedding layers to BERT module
    for emb_obj in extra_bert_embedding_layers:
        bert_model.add_embedding(emb_obj)

    # Create the embedding payload, attention network, and prediction layer modules
    if args.model_config.attn_class == "BootlegM2E":
        embedding_payload = EmbeddingPayload(args, entity_symbols,
                                             embedding_total_sizes)
        attn_network = BootlegM2E(args, entity_symbols)
        pred_layer = PredictionLayer(args)

    elif args.model_config.attn_class == "Bootleg":
        embedding_payload = EmbeddingPayload(args, entity_symbols,
                                             embedding_total_sizes)
        attn_network = Bootleg(args, entity_symbols)
        pred_layer = PredictionLayer(args)

    elif args.model_config.attn_class == "BERTNED":
        # Baseline model
        embedding_payload = EmbeddingPayloadBase(args, entity_symbols,
                                                 embedding_total_sizes)
        attn_network = BERTNED(args, entity_symbols)
        pred_layer = NoopPredictionLayer(args)

    else:
        raise ValueError(f"{args.model_config.attn_class} is not supported.")

    sliced_scorer = BootlegSlicedScorer(args.data_config.train_in_candidates,
                                        slice_datasets)

    # Create module pool and combine with embedding module pool
    module_pool = nn.ModuleDict({
        BERT_MODEL_NAME: bert_model,
        "embedding_payload": embedding_payload,
        "attn_network": attn_network,
        PRED_LAYER: pred_layer,
    })
    module_pool.update(embedding_module_pool)

    # Create task flow
    task_flow = [
        {
            "name": BERT_MODEL_NAME,
            "module": BERT_MODEL_NAME,
            "inputs": [
                ("_input_", "entity_cand_eid"),
                ("_input_", "token_ids"),
            ],  # We pass the entity_cand_eids to BERT in case of embeddings that require word information
        },
        *embedding_task_flows,  # Add task flows to create embedding inputs
        {
            "name":
            "embedding_payload",
            "module":
            "embedding_payload",  # outputs: embedding_tensor
            "inputs": [
                ("_input_", "start_span_idx"),
                ("_input_", "end_span_idx"),
                *embedding_payload_inputs,  # all embeddings
            ],
        },
        {
            "name":
            "attn_network",
            "module":
            "attn_network",  # output: predictions from layers, output entity embeddings
            "inputs": [
                (BERT_MODEL_NAME, 0),  # sentence embedding
                (BERT_MODEL_NAME, 1),  # sentence embedding mask
                ("embedding_payload", 0),
                ("_input_", "entity_cand_eid_mask"),
                ("_input_", "start_span_idx"),
                ("_input_", "end_span_idx"),
                (
                    "_input_",
                    "batch_on_the_fly_kg_adj",
                ),  # special kg adjacency embedding prepped in dataloader
            ],
        },
        {
            "name":
            PRED_LAYER,
            "module":
            PRED_LAYER,
            "inputs": [
                (
                    "attn_network",
                    "intermed_scores",
                ),  # output predictions from intermediate layers from the model
                (
                    "attn_network",
                    "ent_embs",
                ),  # output entity embeddings (from all KG modules)
                (
                    "attn_network",
                    "final_scores",
                ),  # score (empty except for baseline model)
            ],
        },
    ]

    return EmmentalTask(
        name=NED_TASK,
        module_pool=module_pool,
        task_flow=task_flow,
        loss_func=disambig_loss,
        output_func=disambig_output,
        require_prob_for_eval=False,
        require_pred_for_eval=True,
        # action_outputs are used to stitch together sentence fragments
        action_outputs=[
            ("_input_", "sent_idx"),
            ("_input_", "subsent_idx"),
            ("_input_", "alias_orig_list_pos"),
            ("_input_", "for_dump_gold_cand_K_idx_train"),
            (PRED_LAYER, "ent_embs"),  # entity embeddings
        ],
        scorer=Scorer(customize_metric_funcs={
            f"{NED_TASK}_scorer": sliced_scorer.bootleg_score
        }),
        module_device=embedding_module_device_dict,
    )
예제 #6
0
 def _rebuild_module_dict(self):
     self.nets = nn.ModuleDict(self._nets)
    def __init__(
            self,
            n_nets=3,
            # default scale ratio: 2
            #
            #    EXAMPLE with three modules and the last with input shape = (64, 64, 64)
            #    First net input  (256, 256, 256) -> downsample -> (64, 64, 64)
            #    Second net input (128, 128, 128) -> downsample -> (64, 64, 64)
            #    Third net input  (64, 64, 64)
            #
            #    i.e. scale_ratio=2, n_nets=3: 256 -> 128 -> 64
            #         scale_ratio=4, n_nets=2: 256 -> 64
            scale_ratio=2,
            module_shape=(64, 64, 64),
            input_shapes=None,
            initial_ds=None,
            crop_and_ds_inputs=False,
            crop_and_us_outputs=True,
            in_channels=1,
            out_channels=None,
            num_inputs=1,
            filter_sizes_down=(((4, 8), (8, 16), (16, 32)), ((8, 16), (16, 32),
                                                             (32, 64)),
                               ((32, 64), (64, 128), (128, 256))),
            filter_sizes_bottleneck=((32, 64), (64, 128), (256, 512)),
            filter_sizes_up=(((32, 32), (16, 16), (8, 8)), ((64, 64), (32, 32),
                                                            (16, 16)),
                             ((256, 256), (128, 128), (64, 64))),
            batch_norm=True,
            output_activation='softmax',
            verbose=False):

        super(cNnet, self).__init__()

        # ______________________________
        # Parameters and settings

        # Assertions
        assert out_channels is not None, 'The number of output classes for each module needs to be specified!'
        assert in_channels is not None, 'The number of input channels needs to be specified!'
        assert len(filter_sizes_down) == n_nets
        assert len(filter_sizes_up) == n_nets
        assert len(filter_sizes_bottleneck) == n_nets

        # ______________________________
        # Define layers

        self.avg_pool_inputs = nn.ModuleDict()
        self.unets = nn.ModuleList()

        for net_idx in range(n_nets):

            if crop_and_ds_inputs:

                if 0 < net_idx < n_nets - 1:

                    # First, the tensor will be cropped
                    # and then downsampled
                    self.avg_pool_inputs[net_idx] = nn.AvgPool3d(
                        kernel_size=(scale_ratio**(n_nets - net_idx - 1), ) *
                        3)

            self.unets.append(
                Unet(num_classes=out_channels[net_idx],
                     in_channels=in_channels + out_channels[net_idx],
                     filter_sizes_down=filter_sizes_down[net_idx],
                     filter_sizes_up=filter_sizes_up[net_idx],
                     filter_sizes_bottleneck=filter_sizes_bottleneck,
                     kernel_size=3,
                     batch_norm=batch_norm,
                     ndims=3,
                     return_last_upsampling=True,
                     output_activation=output_activation))
예제 #8
0
파일: darts.py 프로젝트: penghouwen/nni
 def __init__(self, layer_choice):
     super(DartsLayerChoice, self).__init__()
     self.name = layer_choice.label
     self.op_choices = nn.ModuleDict(OrderedDict([(name, layer_choice[name]) for name in layer_choice.names]))
     self.alpha = nn.Parameter(torch.randn(len(self.op_choices)) * 1e-3)
예제 #9
0
    def __init__(self,
                 input_list,
                 output_info,
                 fusion_lists,
                 nonlinearity=nn.ReLU()):
        super(VIN, self).__init__()
        self.input_list = input_list
        # This output_info is only for the level0 outputs, i.e., outputs from each input type separately
        self.output_info = output_info
        self.fusion_lists = fusion_lists

        self.num_inputs = len(input_list)
        self.num_levels = len(fusion_lists)
        if isinstance(output_info, dict):
            self.num_targets = 1
        elif isinstance(output_info, list):
            self.num_targets = len(output_info)
            assert self.num_targets > 1
        else:
            raise ValueError(
                f'output_info must be either a dict (one target) or a list (multiple targets), '
                f'but is {type(output_info)}')

        self.weights = nn.ParameterDict()
        self.layers = nn.ModuleDict()
        # Embed discrete variables with nn.Embedding
        self.input_embeddings = nn.ModuleDict()
        # self.repr_dims stores the dimensions of the learned representations from each level;
        # it will be used when combining all the learned representations
        self.repr_dims = [[]]
        # self.repr_locations is only used for fusing repr_list with fusion_type='repr-loss-avg'
        # because there can be more outputs than latent representations in a level (each output is associated a loss),
        # len(self.weights['level{i}[_target{t}]_loss_weight']) can be bigger than len(repr_list)
        # use self.repr_locations to specify the correspondance between loss weight and view weight (through subscript)
        self.repr_locations = [[]]

        # provide default parameters for in_dict
        default_dict = {
            'padding_idx': 0,
            'max_norm': 1,
            'norm_type': 2,
            'scale_grad_by_freq': True,
            'last_nonlinearity': False,
            'bias': False,
            'dense': True,
            'residual': False,
            'residual_layers': 'all'
        }
        for i, in_dict in enumerate(input_list):
            # This is used to produce the learned vector representations from all the input data types individually
            in_dim = in_dict['in_dim']
            in_type = in_dict['in_type']
            hidden_dim = in_dict['hidden_dim']  # hidden_dim is a list
            self.repr_dims[0].append(hidden_dim[-1])
            self.repr_locations[0].append(i)
            # in case in_dict does not contain all required keys, append them from default values
            append_dict(in_dict, default_dict)
            if in_type == 'discrete':
                # If padding_idx=0 (for missing values), the index for a discrete variable should start from 1
                self.input_embeddings[str(i)] = torch.nn.Embedding(
                    num_embeddings=in_dim
                    if in_dict['padding_idx'] is None else in_dim + 1,
                    embedding_dim=in_dict['embedding_dim'],
                    padding_idx=in_dict['padding_idx'],
                    max_norm=in_dict['max_norm'],
                    norm_type=in_dict['norm_type'],
                    scale_grad_by_freq=in_dict['scale_grad_by_freq'],
                    sparse=False,
                    _weight=None)
                in_dim = in_dict['embedding_dim']
            else:
                assert in_type == 'continuous', (
                    f'Currently only handle discrete or continous input type, '
                    f'but {i}th in_type is {in_type}!')
            self.layers[f'input{i}_hidden_layers'] = DenseLinear(
                in_dim=in_dim,
                hidden_dim=hidden_dim,
                nonlinearity=nonlinearity,
                last_nonlinearity=in_dict['last_nonlinearity'],
                bias=in_dict['bias'],
                dense=in_dict['dense'],
                residual=in_dict['residual'],
                residual_layers=in_dict['residual_layers'],
                forward_input=False,
                return_all=False,
                return_layers=None,
                return_list=False)

        # provide default parameters for output_info and fusion_lists
        default_dict = {
            'last_nonlinearity': False,
            'bias': False,
            'dense': True,
            'residual': False,
            'residual_layers': 'all-but-last'
        }
        if self.num_targets == 1:
            # Generate level0 outputs from each input using their high-level representations with DenseLinear model;
            # For code simplicity, make output_layers from all views have the same hidden_dim
            # output_info is a dictionary
            hidden_dim = output_info['hidden_dim']
            self.out_dim = hidden_dim[-1]
            append_dict(
                output_info, default_dict
            )  # provide default values in case they are missing in output_info
            for i, in_dim in enumerate(self.repr_dims[0]):
                # For coding simplicity, the output layers from all views will have the same hidden_dim
                self.layers[f'input{i}_output_layers'] = DenseLinear(
                    in_dim=in_dim,
                    hidden_dim=hidden_dim,
                    nonlinearity=nonlinearity,
                    last_nonlinearity=output_info['last_nonlinearity'],
                    bias=output_info['bias'],
                    dense=output_info['dense'],
                    residual=output_info['residual'],
                    residual_layers=output_info['residual_layers'],
                    forward_input=False,
                    return_all=False,
                    return_layers=None,
                    return_list=False)
        else:  # self.num_targets > 1
            # For each target, generate level0 outputs from each input
            # using their high-level representations with DenseLinear model;
            self.out_dims = []
            # output_info is a list of dictionaries
            # self.num_targets == len(output_info)
            for j, out_dict in enumerate(output_info):
                hidden_dim = out_dict['hidden_dim']  # it is a list
                self.out_dims.append(hidden_dim[-1])
                append_dict(
                    out_dict, default_dict
                )  # provide default values in case they are missing in out_dict
                for i, in_dim in enumerate(self.repr_dims[0]):
                    # For each target, compute an output from each input type
                    self.layers[
                        f'input{i}_target{j}_output_layers'] = DenseLinear(
                            in_dim=in_dim,
                            hidden_dim=hidden_dim,
                            nonlinearity=nonlinearity,
                            last_nonlinearity=out_dict['last_nonlinearity'],
                            bias=out_dict['bias'],
                            dense=out_dict['dense'],
                            residual=out_dict['residual'],
                            residual_layers=out_dict['residual_layers'],
                            forward_input=False,
                            return_all=False,
                            return_layers=None,
                            return_list=False)

        for level, fusion_list in enumerate(fusion_lists):
            num_outputs = self.num_inputs if level == 0 else len(
                fusion_lists[level - 1])
            # loss weight at each level
            if self.num_targets == 1:
                self.weights[f'fusion{level}_loss_weight'] = nn.Parameter(
                    torch.empty(num_outputs), requires_grad=True)
                nn.init.constant_(self.weights[f'fusion{level}_loss_weight'],
                                  1.)
            else:
                for t in range(self.num_targets):
                    self.weights[
                        f'fusion{level}_target{t}_loss_weight'] = nn.Parameter(
                            torch.empty(num_outputs), requires_grad=True)
                    nn.init.constant_(
                        self.weights[f'fusion{level}_target{t}_loss_weight'],
                        1.)
            new_repr_dim = []
            new_repr_location = []
            for i, fusion_dict in enumerate(fusion_list):
                fusion_type = fusion_dict['fusion_type']
                append_dict(
                    fusion_dict, default_dict
                )  # provide default values in case they are missing in fusion_dict
                if fusion_type.startswith('repr'):
                    # learn a new hidden representations from fused representations
                    # prepare in_dim
                    if re.search('avg', fusion_type):
                        for d in self.repr_dims[-1]:
                            assert d == self.repr_dims[-1][0]
                        in_dim = self.repr_dims[-1][0]
                    elif re.search('cat', fusion_type):
                        in_dim = sum(self.repr_dims[-1])
                    elif re.search('repr[0-9]', fusion_type):
                        in_dim = self.repr_dims[-1][int(
                            fusion_type[4:]
                        )]  # here, in most cases, fusion_type='repr0'
                    else:
                        raise ValueError(
                            f'fusion_type={fusion_type} starting with repr must be repr0 '
                            f'or contain either avg or cat')
                    hidden_dim = fusion_dict['hidden_dim']  # a list of ints
                    if re.search('_repr', fusion_type):
                        new_repr_dim.append(hidden_dim[-1])
                        new_repr_location.append(i)
                    self.layers[
                        f'fusion{level}-{i}_hidden_layers'] = DenseLinear(
                            in_dim,
                            hidden_dim,
                            nonlinearity=nonlinearity,
                            last_nonlinearity=fusion_dict['last_nonlinearity'],
                            bias=fusion_dict['bias'],
                            dense=fusion_dict['dense'],
                            residual=fusion_dict['residual'],
                            residual_layers=fusion_dict['residual_layers'],
                            forward_input=False,
                            return_all=False,
                            return_layers=None,
                            return_list=False)

                    # output_info is a dictionary if self.num_targets==1 else a list of dictionaries
                    output_info = fusion_dict['output_info']
                    if self.num_targets == 1:
                        append_dict(
                            output_info, default_dict
                        )  # provide default values in case they are missing in output_info
                        # initialize view weights
                        if fusion_type.startswith('repr-weighted-avg'):
                            self.weights[
                                f'fusion{level}_view_weight'] = nn.Parameter(
                                    torch.empty(len(self.repr_dims[-1])),
                                    requires_grad=True)
                            nn.init.constant_(
                                self.weights[f'fusion{level}_view_weight'], 1.)
                        self.layers[
                            f'fusion{level}-{i}_output_layers'] = DenseLinear(
                                in_dim=hidden_dim[-1],
                                hidden_dim=output_info['hidden_dim'],
                                nonlinearity=nonlinearity,
                                last_nonlinearity=output_info[
                                    'last_nonlinearity'],
                                bias=output_info['bias'],
                                dense=output_info['dense'],
                                residual=output_info['residual'],
                                residual_layers=output_info['residual_layers'],
                                forward_input=False,
                                return_all=False,
                                return_layers=None,
                                return_list=False)
                    else:
                        if fusion_type.startswith('repr-weighted-avg'):
                            for t in range(self.num_targets):
                                self.weights[
                                    f'fusion{level}_target{t}_view_weight'] = nn.Parameter(
                                        torch.empty(len(self.repr_dims[-1])),
                                        requires_grad=True)
                                nn.init.constant_(
                                    self.weights[
                                        f'fusion{level}_target{t}_view_weight'],
                                    1.)
                        for t, out_dict in enumerate(output_info):
                            append_dict(
                                out_dict, default_dict
                            )  # provide default values in case they are missing in out_dict
                            self.layers[
                                f'fusion{level}-{i}_target{t}_output_layers'] = DenseLinear(
                                    in_dim=hidden_dim[-1],
                                    hidden_dim=out_dict['hidden_dim'],
                                    nonlinearity=nonlinearity,
                                    last_nonlinearity=out_dict[
                                        'last_nonlinearity'],
                                    bias=out_dict['bias'],
                                    dense=out_dict['dense'],
                                    residual=out_dict['residual'],
                                    residual_layers=out_dict[
                                        'residual_layers'],
                                    forward_input=False,
                                    return_all=False,
                                    return_layers=None,
                                    return_list=False)
                elif fusion_type.startswith('out'):
                    if self.num_targets == 1:
                        if fusion_type == 'out-weighted-avg':
                            self.weights[
                                f'fusion{level}_out_weight'] = nn.Parameter(
                                    torch.empty(num_outputs),
                                    requires_grad=True)
                            nn.init.constant_(
                                self.weights[f'fusion{level}_out_weight'], 1.)
                    else:
                        if fusion_type.startswith('out-weighted-avg'):
                            for t in range(self.num_targets):
                                self.weights[
                                    f'fusion{level}_target{t}_out_weight'] = nn.Parameter(
                                        torch.empty(num_outputs),
                                        requires_grad=True)
                                nn.init.constant_(
                                    self.weights[
                                        f'fusion{level}_target{t}_out_weight'],
                                    1.)
                else:
                    raise ValueError(
                        f'fusion_type must start with repr or out, but is {fusion_type}'
                    )
            self.repr_dims.append(new_repr_dim)
            self.repr_locations.append(new_repr_location)
        # the loss weight for the last level; not used in almost all the cases; mainly used to avoid error in get_vin_loss
        if self.num_targets == 1:
            self.weights[
                f'fusion{self.num_levels}_loss_weight'] = nn.Parameter(
                    torch.empty(len(fusion_lists[-1])), requires_grad=True)
            nn.init.constant_(
                self.weights[f'fusion{self.num_levels}_loss_weight'], 1.)
        else:
            for t in range(self.num_targets):
                self.weights[
                    f'fusion{self.num_levels}_target{t}_loss_weight'] = nn.Parameter(
                        torch.empty(len(fusion_lists[-1])), requires_grad=True)
                nn.init.constant_(
                    self.
                    weights[f'fusion{self.num_levels}_target{t}_loss_weight'],
                    1.)
예제 #10
0
    def __init__(self, dataset, n_layers, in_channels=3, channels=16, n_nodes=4, retrain=False, shared_modules=None):
        super().__init__()
        assert dataset in ["cifar10", "imagenet"]
        self.dataset = dataset
        self.input_size = 32 if dataset == "cifar" else 224
        self.in_channels = in_channels
        self.channels = channels
        self.n_nodes = n_nodes
        self.aux_size = {2 * n_layers // 3: self.input_size // 4}
        if dataset == "cifar10":
            self.n_classes = 10
            self.aux_head_class = AuxiliaryHeadCIFAR if retrain else DistillHeadCIFAR
            if not retrain:
                self.aux_size = {n_layers // 3: 6, 2 * n_layers // 3: 6}
        elif dataset == "imagenet":
            self.n_classes = 1000
            self.aux_head_class = AuxiliaryHeadImageNet if retrain else DistillHeadImagenet
            if not retrain:
                self.aux_size = {n_layers // 3: 6, 2 * n_layers // 3: 5}
        self.n_layers = n_layers
        self.aux_head = nn.ModuleDict()
        self.ensemble_param = nn.Parameter(torch.rand(len(self.aux_size) + 1) / (len(self.aux_size) + 1)) \
            if not retrain else None

        stem_multiplier = 3 if dataset == "cifar" else 1
        c_cur = stem_multiplier * self.channels
        self.shared_modules = {}  # do not wrap with ModuleDict
        if shared_modules is not None:
            self.stem = shared_modules["stem"]
        else:
            self.stem = nn.Sequential(
                nn.Conv2d(in_channels, c_cur, 3, 1, 1, bias=False),
                nn.BatchNorm2d(c_cur)
            )
            self.shared_modules["stem"] = self.stem

        # for the first cell, stem is used for both s0 and s1
        # [!] channels_pp and channels_p is output channel size, but c_cur is input channel size.
        channels_pp, channels_p, c_cur = c_cur, c_cur, channels

        self.cells = nn.ModuleList()
        reduction_p, reduction = False, False
        aux_head_count = 0
        for i in range(n_layers):
            reduction_p, reduction = reduction, False
            if i in [n_layers // 3, 2 * n_layers // 3]:
                c_cur *= 2
                reduction = True

            cell = Cell(n_nodes, channels_pp, channels_p, c_cur, reduction_p, reduction)
            self.cells.append(cell)
            c_cur_out = c_cur * n_nodes
            if i in self.aux_size:
                if shared_modules is not None:
                    self.aux_head[str(i)] = shared_modules["aux" + str(aux_head_count)]
                else:
                    self.aux_head[str(i)] = self.aux_head_class(c_cur_out, self.aux_size[i], self.n_classes)
                    self.shared_modules["aux" + str(aux_head_count)] = self.aux_head[str(i)]
                aux_head_count += 1
            channels_pp, channels_p = channels_p, c_cur_out

        self.gap = nn.AdaptiveAvgPool2d(1)
        self.linear = nn.Linear(channels_p, self.n_classes)
예제 #11
0
# 4.1.2.2 ModuleList类

net = nn.ModuleList([nn.Linear(784, 256), nn.ReLU()])
net.append(nn.Linear(256, 10))  # # 类似List的append操作
print(net[-1])  # 类似List的索引访问
print(net)
# net(torch.zeros(1, 784)) # 会报NotImplementedError
# 报错的原因是:
# ModuleList仅仅是一个储存各种模块的列表,这些模块之间没有联系也没有顺序
# 所以不用保证相邻层的输入输出维度匹配,所以没有实现forward(前向传播)功能,导致报错

# ModuleDict类
# ModuleDict接收一个子模块的字典作为输入, 然后也可以类似字典那样进行添加访问操作:
net = nn.ModuleDict({
    'linear': nn.Linear(784, 256),
    'act': nn.ReLU(),
})
net['output'] = nn.Linear(256, 10)  # 添加
print(net['linear'])  # 访问
print(net.output)
print(net)

# net(torch.zeros(1, 784)) # 会报NotImplementedError


#4.1.3 构造复杂的模型
class FancyMLP(nn.Module):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)

        self.rand_weight = torch.rand((20, 20),
예제 #12
0
 def __init__(self, normalize=False):
     super().__init__()
     self.normalize = normalize
     self.losses = nn.ModuleDict()
     self.weights = {}
     self.values = {}
 def __init__(self, global_args, network_args, loss_func):
     super(NetworkABC, self).__init__()
     self.global_args = global_args
     self.network_args = network_args
     self.loss_func = loss_func
     self.net = nn.ModuleDict()
예제 #14
0
    def __init__(self,
                 args,
                 generator,
                 discriminator,
                 gen_optim,
                 disc_optim,
                 train_loader,
                 val_loader,
                 loss_funcs,
                 gen_scheduler=None,
                 disc_scheduler=None):

        self.logger = get_logger(name=__name__,
                                 save_file=args.log_path / args.run_name)

        # Checking whether inputs are correct.
        assert isinstance(generator, nn.Module) and isinstance(discriminator, nn.Module), \
            '`generator` and `discriminator` must be Pytorch Modules.'
        assert isinstance(gen_optim, optim.Optimizer) and isinstance(disc_optim, optim.Optimizer), \
            '`gen_optim` and `disc_optim` must be Pytorch Optimizers.'
        assert isinstance(train_loader, DataLoader) and isinstance(val_loader, DataLoader), \
            '`train_loader` and `val_loader` must be Pytorch DataLoader objects.'

        loss_funcs = nn.ModuleDict(
            loss_funcs
        )  # Expected to be a dictionary with names and loss functions.

        if gen_scheduler is not None:
            if isinstance(gen_scheduler, optim.lr_scheduler.ReduceLROnPlateau):
                self.metric_gen_scheduler = True
            elif isinstance(gen_scheduler, optim.lr_scheduler._LRScheduler):
                self.metric_gen_scheduler = False
            else:
                raise TypeError(
                    '`gen_scheduler` must be a Pytorch Learning Rate Scheduler.'
                )

        if disc_scheduler is not None:
            if isinstance(disc_scheduler,
                          optim.lr_scheduler.ReduceLROnPlateau):
                self.metric_disc_scheduler = True
            elif isinstance(disc_scheduler, optim.lr_scheduler._LRScheduler):
                self.metric_disc_scheduler = False
            else:
                raise TypeError(
                    '`disc_scheduler` must be a Pytorch Learning Rate Scheduler.'
                )

        self.generator = generator
        self.discriminator = discriminator
        self.gen_optim = gen_optim
        self.disc_optim = disc_optim
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.loss_funcs = loss_funcs
        self.gen_scheduler = gen_scheduler
        self.disc_scheduler = disc_scheduler
        self.device = args.device
        self.verbose = args.verbose
        self.num_epochs = args.num_epochs
        self.writer = SummaryWriter(str(args.log_path))

        self.recon_lambda = torch.tensor(args.recon_lambda,
                                         dtype=torch.float32,
                                         device=args.device)

        # This will work best if batch size is 1, as is recommended. I don't know whether this generalizes.
        self.target_real = torch.tensor(1,
                                        dtype=torch.float32,
                                        device=args.device)
        self.target_fake = torch.tensor(0,
                                        dtype=torch.float32,
                                        device=args.device)

        # Display interval of 0 means no display of validation images on TensorBoard.
        if args.max_images <= 0:
            self.display_interval = 0
        else:
            self.display_interval = int(
                len(self.val_loader.dataset) //
                (args.max_images * args.batch_size))

        self.generator_checkpoint_manager = CheckpointManager(
            model=self.generator,
            optimizer=self.gen_optim,
            mode='min',
            save_best_only=args.save_best_only,
            ckpt_dir=args.ckpt_path / 'Generator',
            max_to_keep=args.max_to_keep)

        self.discriminator_checkpoint_manager = CheckpointManager(
            model=self.discriminator,
            optimizer=self.disc_optim,
            mode='min',
            save_best_only=args.save_best_only,
            ckpt_dir=args.ckpt_path / 'Discriminator',
            max_to_keep=args.max_to_keep)

        # loading from checkpoint if specified.
        if vars(args).get('gen_prev_model_ckpt'):
            self.generator_checkpoint_manager.load(
                load_dir=args.gen_prev_model_ckpt, load_optimizer=False)

        if vars(args).get('disc_prev_model_ckpt'):
            self.discriminator_checkpoint_manager.load(
                load_dir=args.disc_prev_model_ckpt, load_optimizer=False)
예제 #15
0
    def __init__(self,
                 util_e,
                 high_order_utils=[],
                 prior_flag=False,
                 sizes=[],
                 size_flag=False,
                 size_force=False,
                 pairwise_flag=True,
                 unary_flag=True,
                 self_flag=True):
        super(Atten, self).__init__()

        self.util_e = util_e

        self.prior_flag = prior_flag

        self.n_utils = len(util_e)

        self.spatial_pool = nn.ModuleDict()

        self.un_models = nn.ModuleList()

        self.self_flag = self_flag
        self.pairwise_flag = pairwise_flag
        self.unary_flag = unary_flag
        self.size_flag = size_flag
        self.size_force = size_force
        if not self.size_flag:
            sizes = [None for _ in util_e]
        self.high_order_utils = high_order_utils
        self.high_order_set = set([h[0] for h in self.high_order_utils])

        for idx, e_dim in enumerate(util_e):
            self.un_models.append(Unary(e_dim))
            if self.size_force:
                self.spatial_pool[str(idx)] = nn.AdaptiveAvgPool1d(sizes[idx])

        self.pp_models = nn.ModuleDict()
        for ((idx1, e_dim_1), (idx2, e_dim_2)) \
                in combinations_with_replacement(enumerate(util_e), 2):
            if idx1 == idx2:
                self.pp_models[str(idx1)] = Pairwise(e_dim_1, sizes[idx1])
            else:
                if pairwise_flag:
                    for i, num_utils, connected_list in self.high_order_utils:
                        if i == idx1 and idx2 not in set(connected_list) \
                                or idx2 == i and idx1 not in set(connected_list):
                            continue
                    self.pp_models[str(
                        (idx1, idx2))] = Pairwise(e_dim_1, sizes[idx1],
                                                  e_dim_2, sizes[idx2])

        self.reduce_potentials = nn.ModuleList()
        self.num_of_potentials = dict()

        self.default_num_of_potentials = 0

        if self.self_flag:
            self.default_num_of_potentials += 1
        if self.unary_flag:
            self.default_num_of_potentials += 1
        if self.prior_flag:
            self.default_num_of_potentials += 1
        for idx in range(self.n_utils):
            self.num_of_potentials[idx] = self.default_num_of_potentials
        '''
        ' All other utils
        '''
        if pairwise_flag:
            for idx, num_utils, connected_utils in high_order_utils:
                for c_u in connected_utils:
                    self.num_of_potentials[c_u] += num_utils
                    self.num_of_potentials[idx] += 1
            for k in self.num_of_potentials.keys():
                if k not in self.high_order_set:
                    self.num_of_potentials[k] += (self.n_utils -
                                                  1) - len(high_order_utils)

        for idx in range(self.n_utils):
            self.reduce_potentials.append(
                nn.Conv1d(self.num_of_potentials[idx], 1, 1, bias=False))
예제 #16
0
 def __init__(self, models: OrderedDict):
     super(CombinedModel, self).__init__()
     self.semantic_groups = OrderedDict()
     self.model_list = nn.ModuleDict(models)
     self.semantic_groups = {g:models[g].output_semantics for g in models}
예제 #17
0
    def __init__(self, layer,
                 filter_multiplier, block_multiplier, steps, scale, search_space,
                 ppc=None, pc=None, affine=True):
        super(GumbelCell, self).__init__()

        # todo add new attribute, affine parameter for bn, making searching phase more stable
        self.affine = affine
        # todo add new attribute, for debugging
        self.layer = layer
        # change index2scale to index2channel
        # index -2 and -1 is set by default
        # index 0, 1, 2, 3, 4 are calculated by int(filter_multiplier * block_multiplier * scale /4)
        self.index2scale = {
            0: 4,
            1: 8,
            2: 16,
            3: 32,
        }
        self.index2channel = {
            0: int(filter_multiplier * block_multiplier * self.index2scale[0] / 4),
            1: int(filter_multiplier * block_multiplier * self.index2scale[1] / 4),
            2: int(filter_multiplier * block_multiplier * self.index2scale[2] / 4),
            3: int(filter_multiplier * block_multiplier * self.index2scale[3] / 4),
        }
        self.steps = steps # nodes within each cell
        # todo add new attribute
        self.total_nodes = 2 + self.steps # exclude output node

        self.filter_multiplier = filter_multiplier
        self.block_multiplier = block_multiplier
        self.scale = scale

        self.search_space = search_space
        if self.search_space == 'autodeeplab':
            self.conv_candidates = autodeeplab
        elif self.search_space == 'proxyless':
            self.conv_candidates = proxyless
        elif self.search_space == 'counter': # used to debug
            self.conv_candidates = counter
        elif self.search_space == 'my_search_space':
            self.conv_candidates = my_search_space
        else:
            raise ValueError('search space {:} is not support'.format(self.search_space))
        #self.conv_candidates = conv_candidates
        #self.prev_prev_scale = prev_prev_scale
        #self.prev_scale = prev_scale
        self.outc = self.index2channel[self.scale]

        # TODO: do not need prev_prev_scale and prev_scale any more
        # 1. down same up link for prev_feature
        # 2. down same up, double down, and double up link for prev_prev_feature
        # 3. all the link operations are defined in __init__
        # 4. justification in forward() pass, and call the related link operation
        # 5. set prev_feature_channels and prev_prev_feature_channels specifically for output of stem0 and stem1

        # set types of link operation according to self.scale
        if self.scale == 0:
            # only has same and up link for prev_feature
            # only has same, up, and double up link for prev_prev_feature
            self.same_link_prev           = ConvLayer(self.outc if pc is None else pc, self.outc, 1, 1, False, affine=affine)
            self.up_link_prev             = FactorizedIncrease(int(self.outc*2) if pc is None else pc, self.outc, affine=affine)
            self.same_link_prev_prev      = ConvLayer(self.outc if ppc is None else ppc, self.outc, 1, 1, False, affine=affine)
            self.up_link_prev_prev        = FactorizedIncrease(int(self.outc*2) if ppc is None else ppc, self.outc, affine=affine)
            self.double_up_link_prev_prev = DoubleFactorizedIncrease(int(self.outc*4) if ppc is None else ppc, self.outc, affine=affine)
            # has down for prev_prev_feature in layer-0
            self.down_link_prev_prev      = FactorizedReduce(int(self.outc/2) if ppc is None else ppc, self.outc, affine=affine)
        elif self.scale == 1:
            # has down, same, up link for prev_feature
            # has down, same, up, and double up link for prev_prev_feature
            self.down_link_prev             = FactorizedReduce(int(self.outc/2) if pc is None else pc, self.outc, affine=affine)
            self.same_link_prev             = ConvLayer(self.outc if pc is None else pc, self.outc, 1, 1, False, affine=affine)
            self.up_link_prev               = FactorizedIncrease(int(self.outc*2) if pc is None else pc, self.outc, affine=affine)
            self.down_link_prev_prev        = FactorizedReduce(int(self.outc/2) if ppc is None else ppc, self.outc, affine=affine)
            self.same_link_prev_prev        = ConvLayer(self.outc if ppc is None else ppc, self.outc, 1, 1, False, affine=affine)
            self.up_link_prev_prev          = FactorizedIncrease(int(self.outc*2) if ppc is None else ppc, self.outc, affine=affine)
            self.double_up_link_prev_prev   = DoubleFactorizedIncrease(int(self.outc*4) if ppc is None else ppc, self.outc, affine=affine)
            # has double down link for prev_prev_feature
            self.double_down_link_prev_prev = DoubleFactorizedReduce(int(self.outc/4) if ppc is None else ppc, self.outc, affine=affine)
        elif self.scale == 2:
            # has down, same, up link for prev_feature
            # has ddown, same, up link for prev_prev_feature
            self.down_link_prev             = FactorizedReduce(int(self.outc/2) if pc is None else pc, self.outc, affine=affine)
            self.same_link_prev             = ConvLayer(self.outc if pc is None else pc, self.outc, 1, 1, False, affine=affine)
            self.up_link_prev               = FactorizedIncrease(int(self.outc*2) if pc is None else pc, self.outc, affine=affine)
            self.down_link_prev_prev        = FactorizedReduce(int(self.outc/2) if ppc is None else ppc, self.outc, affine=affine)
            self.double_down_link_prev_prev = DoubleFactorizedReduce(int(self.outc/4) if ppc is None else ppc, self.outc, affine=affine)
            self.same_link_prev_prev        = ConvLayer(self.outc if ppc is None else ppc, self.outc, 1, 1, False, affine=affine)
            self.up_link_prev_prev          = FactorizedIncrease(int(self.outc*2) if ppc is None else ppc, self.outc, affine=affine)
        elif self.scale == 3:
            # has down, same link for prev_feature
            # has ddown, down, and same for prev_prev_feature
            self.down_link_prev             = FactorizedReduce(int(self.outc/2) if pc is None else pc, self.outc, affine=affine)
            self.same_link_prev             = ConvLayer(self.outc if pc is None else pc, self.outc, 1, 1, False, affine=affine)
            self.double_down_link_prev_prev = DoubleFactorizedReduce(int(self.outc/4) if ppc is None else ppc, self.outc, affine=affine)
            self.down_link_prev_prev        = FactorizedReduce(int(self.outc/2) if ppc is None else ppc, self.outc, affine=affine)
            self.same_link_prev_prev        = ConvLayer(self.outc if ppc is None else ppc, self.outc, 1, 1, False, affine=affine)
        else:
            raise ValueError('invalid scale value {:}'.format(self.scale))

        # todo, new attribute nn.ModuleDict()
        self.ops = nn.ModuleDict()
        # i::node_index, j::previous_node_index
        if self.search_space == 'proxyless':
            for i in range(2, self.total_nodes):
                for j in range(i):
                    edge_str = '{:}<-{:}'.format(i, j)
                    #if j == 0 and self.prev_prev_scale is None:  # for prev_prev_cell
                    #    mobile_inverted_conv = None
                    #    shortcut = None
                    #else:
                    mobile_inverted_conv = MixedOp(
                        build_candidate_ops(self.conv_candidates,
                                            in_channels=self.outc, out_channels=self.outc, stride=1,
                                            ops_order='act_weight_bn', affine=self.affine))  # normal MixedOp, ModuleList with weight
                    shortcut = Identity(self.outc, self.outc)
                    #if mobile_inverted_conv is None and shortcut is None:
                    #    inverted_residual_block = None
                    #else:
                    inverted_residual_block = MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)
                    self.ops[edge_str] = inverted_residual_block
        elif self.search_space == 'autodeeplab' or self.search_space == 'my_search_space':
            # TODO: have issue in search space of autodeeplab
            for i in range(2, self.total_nodes):
                for j in range(i):
                    edge_str = '{:}<-{:}'.format(i, j)
                    #if j == 0 and self.prev_prev_scale is None:
                    #    op = None
                    #else:
                    op = MixedOp(build_candidate_ops(self.conv_candidates, in_channels=self.outc, out_channels=self.outc, stride=1,
                                                ops_order='act_weight_bn', affine=self.affine))
                    self.ops[edge_str] = op
        else:
            raise ValueError('search space {:} is not supported'.format(self.search_space))

        self.finalconv1x1 = ConvLayer(self.steps * self.outc, self.outc, 1, 1, False)

        self.edge_keys = sorted(list(self.ops.keys())) # 'sorted by {:}<-{:}'
        self.edge2index = {key:i for i, key in enumerate(self.edge_keys)} # {:}<-{:} : index
        self.nb_edges = len(self.ops)

        #self.cell_arch_parameters = nn.Parameter(torch.Tensor(self.nb_edges, self.n_choice))
        self.cell_arch_parameters = nn.Parameter(1e-3 * torch.randn(self.nb_edges, self.n_choice))
예제 #18
0
num_epochs = 1200
batch_size = 100

# load all data
data = load_data()
# normalization(data)
# define train dataset and a data loader
train_data, test_data = split_data(data)

# Normalize data using z-score method
normalization(train_data)
normalization(test_data)

data_tensor = torch.Tensor(train_data.values)

input_hidden_layers = nn.ModuleDict()
hidden_hidden_layers = nn.ModuleDict()
hidden_output_layers = nn.ModuleDict()
net = Net(input_size, num_classes, input_hidden_layers, hidden_hidden_layers,
          hidden_output_layers)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss(
)  # nn.CrossEntropyLoss() computes softmax internally
addNeuron(net)

# train the model by batch
previous_loss = float('inf')

# The time period is 15+P*N (refer to: A Cascade network algorithm employing Progressive RPROP) where N is
# the number of currently installed neurons, and P is a parameter set prior to training.)
예제 #19
0
파일: task.py 프로젝트: kaikun213/fonduer
def create_task(
    task_names: Union[str, List[str]],
    n_arities: Union[int, List[int]],
    n_features: int,
    n_classes: Union[int, List[int]],
    emb_layer: Optional[EmbeddingModule],
    model: str = "LSTM",
    mode: str = "MTL",
) -> List[EmmentalTask]:
    """Create task from relation(s).

    :param task_names: Relation name(s), If str, only one relation; If List[str],
        multiple relations.
    :type task_names: str, List[str]
    :param n_arities: The arity of each relation.
    :type n_arities: int, List[int]
    :param n_features: The multimodal feature set size.
    :type n_features: int
    :param n_classes: Number of classes for each task. (Only support classification
        task now).
    :type n_classes: int, List[int]
    :param emb_layer: The embedding layer for LSTM. No need for LogisticRegression
        model.
    :type emb_layer: EmbeddingModule
    :param model: Model name (available models: "LSTM", "LogisticRegression"),
        defaults to "LSTM".
    :type model: str
    :param mode: Learning mode (available modes: "STL", "MTL"),
        defaults to "MTL".
    :type mode: str

    """

    if model not in ["LSTM", "LogisticRegression"]:
        raise ValueError(
            f"Unrecognized model {model}. Only support {['LSTM', 'LogisticRegression']}"
        )

    if mode not in ["STL", "MTL"]:
        raise ValueError(
            f"Unrecognized mode {mode}. Only support {['STL', 'MTL']}")

    config = get_config()["learning"][model]
    logger.info(f"{model} model config: {config}")

    if not isinstance(task_names, list):
        task_names = [task_names]
    if not isinstance(n_arities, list):
        n_arities = [n_arities]
    if not isinstance(n_classes, list):
        n_classes = [n_classes]

    tasks = []

    for task_name, n_arity, n_class in zip(task_names, n_arities, n_classes):
        if mode == "MTL":
            feature_module_name = "shared_feature"
        else:
            feature_module_name = f"{task_name}_feature"

        if model == "LSTM":
            module_pool = nn.ModuleDict({
                "emb":
                emb_layer,
                feature_module_name:
                SparseLinear(n_features + 1,
                             config["hidden_dim"],
                             bias=config["bias"]),
            })
            for i in range(n_arity):
                module_pool.update({
                    f"{task_name}_lstm{i}":
                    RNN(
                        num_classes=0,
                        emb_size=emb_layer.dim,
                        lstm_hidden=config["hidden_dim"],
                        attention=config["attention"],
                        dropout=config["dropout"],
                        bidirectional=config["bidirectional"],
                    )
                })
            module_pool.update({
                f"{task_name}_pred_head":
                ConcatLinear(
                    [f"{task_name}_lstm{i}"
                     for i in range(n_arity)] + [feature_module_name],
                    config["hidden_dim"] * (2 * n_arity + 1)
                    if config["bidirectional"] else config["hidden_dim"] *
                    (n_arity + 1),
                    n_class,
                )
            })

            task_flow = []
            task_flow += [{
                "name": f"{task_name}_emb{i}",
                "module": "emb",
                "inputs": [("_input_", f"m{i}")],
            } for i in range(n_arity)]
            task_flow += [{
                "name":
                f"{task_name}_lstm{i}",
                "module":
                f"{task_name}_lstm{i}",
                "inputs": [(f"{task_name}_emb{i}", 0),
                           ("_input_", f"m{i}_mask")],
            } for i in range(n_arity)]
            task_flow += [{
                "name":
                feature_module_name,
                "module":
                feature_module_name,
                "inputs": [
                    ("_input_", "feature_index"),
                    ("_input_", "feature_weight"),
                ],
            }]
            task_flow += [{
                "name": f"{task_name}_pred_head",
                "module": f"{task_name}_pred_head",
                "inputs": None,
            }]
        elif model == "LogisticRegression":
            module_pool = nn.ModuleDict({
                feature_module_name:
                SparseLinear(n_features + 1,
                             config["hidden_dim"],
                             bias=config["bias"]),
                f"{task_name}_pred_head":
                ConcatLinear([feature_module_name], config["hidden_dim"],
                             n_class),
            })

            task_flow = [
                {
                    "name":
                    feature_module_name,
                    "module":
                    feature_module_name,
                    "inputs": [
                        ("_input_", "feature_index"),
                        ("_input_", "feature_weight"),
                    ],
                },
                {
                    "name": f"{task_name}_pred_head",
                    "module": f"{task_name}_pred_head",
                    "inputs": None,
                },
            ]
        else:
            raise ValueError(f"Unrecognized model {model}.")

        tasks.append(
            EmmentalTask(
                name=task_name,
                module_pool=module_pool,
                task_flow=task_flow,
                loss_func=partial(loss, f"{task_name}_pred_head"),
                output_func=partial(output, f"{task_name}_pred_head"),
                scorer=Scorer(
                    metrics=["accuracy", "precision", "recall", "f1"]),
            ))

    return tasks
예제 #20
0
    def __init__(
        self,
        config_trend=None,
        config_season=None,
        config_covar=None,
        config_regressors=None,
        config_events=None,
        config_holidays=None,
        n_forecasts=1,
        n_lags=0,
        num_hidden_layers=0,
        d_hidden=None,
    ):
        """
        Args:
            config_trend (configure.Trend):
            config_season (configure.Season):
            config_covar (OrderedDict):
            config_regressors (OrderedDict): Configs of regressors with mode and index.
            config_events (OrderedDict):
            config_holidays (OrderedDict):
            n_forecasts (int): number of steps to forecast. Aka number of model outputs.
            n_lags (int): number of previous steps of time series used as input. Aka AR-order.
                0 (default): no auto-regression
            num_hidden_layers (int): number of hidden layers (for AR-Net)
                0 (default): no hidden layers, corresponds to classic Auto-Regression
            d_hidden (int): dimensionality of hidden layers  (for AR-Net). ignored if no hidden layers.
                None (default): sets to n_lags + n_forecasts
        """
        super(TimeNet, self).__init__()
        # General
        self.n_forecasts = n_forecasts

        # Bias
        self.bias = new_param(dims=[1])

        # Metrics live
        self.metrics_live = {}

        # Trend
        self.config_trend = config_trend
        if self.config_trend.growth in ["linear", "discontinuous"]:
            self.segmentwise_trend = self.config_trend.trend_reg == 0
            self.trend_k0 = new_param(dims=[1])
            if self.config_trend.n_changepoints > 0:
                if self.config_trend.changepoints is None:
                    # create equidistant changepoint times, including zero.
                    linear_t = np.arange(self.config_trend.n_changepoints +
                                         1).astype(float)
                    linear_t = linear_t / (self.config_trend.n_changepoints +
                                           1)
                    self.config_trend.changepoints = self.config_trend.changepoints_range * linear_t
                else:
                    self.config_trend.changepoints = np.insert(
                        self.config_trend.changepoints, 0, 0.0)
                self.trend_changepoints_t = torch.tensor(
                    self.config_trend.changepoints,
                    requires_grad=False,
                    dtype=torch.float)
                self.trend_deltas = new_param(dims=[
                    self.config_trend.n_changepoints + 1
                ])  # including first segment
                if self.config_trend.growth == "discontinuous":
                    self.trend_m = new_param(dims=[
                        self.config_trend.n_changepoints + 1
                    ])  # including first segment

        # Seasonalities
        self.config_season = config_season
        self.season_dims = season_config_to_model_dims(self.config_season)
        if self.season_dims is not None:
            if self.config_season.mode == "multiplicative" and self.config_trend is None:
                log.error("Multiplicative seasonality requires trend.")
                raise ValueError
            if self.config_season.mode not in ["additive", "multiplicative"]:
                log.error(
                    "Seasonality Mode {} not implemented. Defaulting to 'additive'."
                    .format(self.config_season.mode))
                self.config_season.mode = "additive"
            self.season_params = nn.ParameterDict({
                name: new_param(dims=[dim])
                for name, dim in self.season_dims.items()
            })
            # self.season_params_vec = torch.cat([self.season_params[name] for name in self.season_params.keys()])

        # Events
        self.config_events = config_events
        self.config_holidays = config_holidays
        self.events_dims = events_config_to_model_dims(self.config_events,
                                                       self.config_holidays)
        if self.events_dims is not None:
            n_additive_event_params = 0
            n_multiplicative_event_params = 0
            for event, configs in self.events_dims.items():
                if configs["mode"] not in ["additive", "multiplicative"]:
                    log.error(
                        "Event Mode {} not implemented. Defaulting to 'additive'."
                        .format(configs["mode"]))
                    self.events_dims[event]["mode"] = "additive"
                if configs["mode"] == "additive":
                    n_additive_event_params += len(configs["event_indices"])
                elif configs["mode"] == "multiplicative":
                    if self.config_trend is None:
                        log.error("Multiplicative events require trend.")
                        raise ValueError
                    n_multiplicative_event_params += len(
                        configs["event_indices"])
            self.event_params = nn.ParameterDict({
                "additive":
                new_param(dims=[n_additive_event_params]),
                "multiplicative":
                new_param(dims=[n_multiplicative_event_params]),
            })
        else:
            self.config_events = None
            self.config_holidays = None

            # Autoregression
        self.n_lags = n_lags
        self.num_hidden_layers = num_hidden_layers
        self.d_hidden = n_lags + n_forecasts if d_hidden is None else d_hidden
        if self.n_lags > 0:
            self.ar_net = nn.ModuleList()
            d_inputs = self.n_lags
            for i in range(self.num_hidden_layers):
                self.ar_net.append(
                    nn.Linear(d_inputs, self.d_hidden, bias=True))
                d_inputs = self.d_hidden
            self.ar_net.append(
                nn.Linear(d_inputs, self.n_forecasts, bias=False))
            for lay in self.ar_net:
                nn.init.kaiming_normal_(lay.weight, mode="fan_in")

        # Covariates
        self.config_covar = config_covar
        if self.config_covar is not None:
            assert self.n_lags > 0
            self.covar_nets = nn.ModuleDict({})
            for covar in self.config_covar.keys():
                covar_net = nn.ModuleList()
                d_inputs = self.n_lags
                if self.config_covar[covar].as_scalar:
                    d_inputs = 1
                for i in range(self.num_hidden_layers):
                    covar_net.append(
                        nn.Linear(d_inputs, self.d_hidden, bias=True))
                    d_inputs = self.d_hidden
                covar_net.append(
                    nn.Linear(d_inputs, self.n_forecasts, bias=False))
                for lay in covar_net:
                    nn.init.kaiming_normal_(lay.weight, mode="fan_in")
                self.covar_nets[covar] = covar_net

        ## Regressors
        self.config_regressors = config_regressors
        self.regressors_dims = regressors_config_to_model_dims(
            config_regressors)
        if self.regressors_dims is not None:
            n_additive_regressor_params = 0
            n_multiplicative_regressor_params = 0
            for name, configs in self.regressors_dims.items():
                if configs["mode"] not in ["additive", "multiplicative"]:
                    log.error(
                        "Regressors mode {} not implemented. Defaulting to 'additive'."
                        .format(configs["mode"]))
                    self.regressors_dims[name]["mode"] = "additive"
                if configs["mode"] == "additive":
                    n_additive_regressor_params += 1
                elif configs["mode"] == "multiplicative":
                    if self.config_trend is None:
                        log.error("Multiplicative regressors require trend.")
                        raise ValueError
                    n_multiplicative_regressor_params += 1

            self.regressor_params = nn.ParameterDict({
                "additive":
                new_param(dims=[n_additive_regressor_params]),
                "multiplicative":
                new_param(dims=[n_multiplicative_regressor_params]),
            })
        else:
            self.config_regressors = None
예제 #21
0
    def __init__(self, classes: Sequence[str], n_leads: int,
                 config: dict) -> NoReturn:
        """ finished, checked,

        Parameters:
        -----------
        classes: sequence of int,
            name of the classes
        n_leads: int,
            number of input leads
        config: dict,
            other hyper-parameters, including kernel sizes, etc.
            ref. the corresponding config file
        """
        super().__init__()
        self.classes = list(classes)
        self.n_classes = len(classes)
        self.__out_channels = len(classes)
        self.__in_channels = n_leads
        self.config = ED(deepcopy(config))
        if self.__DEBUG__:
            print(
                f"configuration of {self.__name__} is as follows\n{dict_to_str(self.config)}"
            )
            __debug_seq_len = 5000

        # TODO: an init batch normalization?
        if self.config.init_batch_norm:
            self.init_bn = nn.BatchNorm1d(
                num_features=self.__in_channels,
                eps=1e-5,  # default val
                momentum=0.1,  # default val
            )

        self.init_conv = TripleConv(
            in_channels=self.__in_channels,
            out_channels=self.config.init_num_filters,
            filter_lengths=self.config.init_filter_length,
            subsample_lengths=1,
            groups=self.config.groups,
            dropouts=self.config.init_dropouts,
            batch_norm=self.config.batch_norm,
            activation=self.config.activation,
            kw_activation=self.config.kw_activation,
            kernel_initializer=self.config.kernel_initializer,
            kw_initializer=self.config.kw_initializer,
        )
        if self.__DEBUG__:
            __debug_output_shape = self.init_conv.compute_output_shape(
                __debug_seq_len)
            print(
                f"given seq_len = {__debug_seq_len}, init_conv output shape = {__debug_output_shape}"
            )
            _, _, __debug_seq_len = __debug_output_shape

        self.down_blocks = nn.ModuleDict()
        in_channels = self.config.init_num_filters
        for idx in range(self.config.down_up_block_num - 1):
            self.down_blocks[f"down_{idx}"] = \
                DownTripleConv(
                    down_scale=self.config.down_scales[idx],
                    in_channels=in_channels,
                    out_channels=self.config.down_num_filters[idx],
                    filter_lengths=self.config.down_filter_lengths[idx],
                    groups=self.config.groups,
                    dropouts=self.config.down_dropouts[idx],
                    mode=self.config.down_mode,
                    **(self.config.down_block)
                )
            in_channels = self.config.down_num_filters[idx][-1]
            if self.__DEBUG__:
                __debug_output_shape = self.down_blocks[
                    f"down_{idx}"].compute_output_shape(__debug_seq_len)
                print(
                    f"given seq_len = {__debug_seq_len}, down_{idx} output shape = {__debug_output_shape}"
                )
                _, _, __debug_seq_len = __debug_output_shape

        self.bottom_block = DownBranchedDoubleConv(
            down_scale=self.config.down_scales[-1],
            in_channels=in_channels,
            out_channels=self.config.bottom_num_filters,
            filter_lengths=self.config.bottom_filter_lengths,
            dilations=self.config.bottom_dilations,
            groups=self.config.groups,
            dropouts=self.config.bottom_dropouts,
            mode=self.config.down_mode,
            **(self.config.down_block))
        if self.__DEBUG__:
            __debug_output_shape = self.bottom_block.compute_output_shape(
                __debug_seq_len)
            print(
                f"given seq_len = {__debug_seq_len}, bottom_block output shape = {__debug_output_shape}"
            )
            _, _, __debug_seq_len = __debug_output_shape

        self.up_blocks = nn.ModuleDict()
        # in_channels = sum([branch[-1] for branch in self.config.bottom_num_filters])
        in_channels = self.bottom_block.compute_output_shape(None, None)[1]
        for idx in range(self.config.down_up_block_num):
            self.up_blocks[f"up_{idx}"] = \
                UpTripleConv(
                    up_scale=self.config.up_scales[idx],
                    in_channels=in_channels,
                    out_channels=self.config.up_num_filters[idx],
                    filter_lengths=self.config.up_conv_filter_lengths[idx],
                    deconv_filter_length=self.config.up_deconv_filter_lengths[idx],
                    groups=self.config.groups,
                    mode=self.config.up_mode,
                    dropouts=self.config.up_dropouts[idx],
                    **(self.config.up_block)
                )
            in_channels = self.config.up_num_filters[idx][-1]
            if self.__DEBUG__:
                __debug_output_shape = self.up_blocks[
                    f"up_{idx}"].compute_output_shape(__debug_seq_len)
                print(
                    f"given seq_len = {__debug_seq_len}, up_{idx} output shape = {__debug_output_shape}"
                )
                _, _, __debug_seq_len = __debug_output_shape

        self.out_conv = Conv_Bn_Activation(
            in_channels=self.config.up_num_filters[-1][-1],
            out_channels=self.__out_channels,
            kernel_size=self.config.out_filter_length,
            stride=1,
            groups=self.config.groups,
            batch_norm=self.config.batch_norm,
            activation=self.config.activation,
            kw_activation=self.config.kw_activation,
            kernel_initializer=self.config.kernel_initializer,
            kw_initializer=self.config.kw_initializer,
        )
        if self.__DEBUG__:
            __debug_output_shape = self.out_conv.compute_output_shape(
                __debug_seq_len)
            print(
                f"given seq_len = {__debug_seq_len}, out_conv output shape = {__debug_output_shape}"
            )

        # for inference
        # if background counted in `classes`, use softmax
        # otherwise use sigmoid
        self.softmax = nn.Softmax(-1)
        self.sigmoid = nn.Sigmoid()
예제 #22
0
파일: classifier.py 프로젝트: georgepar/slp
    def __init__(
        self,
        modality_feature_sizes,
        num_classes,
        num_layers=2,
        hidden_size=100,
        num_heads=4,
        max_length=512,
        inner_size=400,
        dropout=0.1,
        nystrom=True,
        num_landmarks=32,
        kernel_size=33,
        prenorm=True,
        scalenorm=True,
        multi_modal_drop="mmdrop",
        p_mmdrop=0.5,
        p_drop_modalities=None,
    ):
        super(TransformerLateFusionClassifier, self).__init__()
        self.modalities = modality_feature_sizes.keys()
        self.modality_encoders = nn.ModuleDict(
            {
                m: TransformerSequenceEncoder(
                    modality_feature_sizes[m],
                    feature_normalization=True if m == "audio" else False,
                    num_layers=num_layers,
                    hidden_size=hidden_size,
                    num_heads=num_heads,
                    max_length=max_length,
                    inner_size=inner_size,
                    dropout=dropout,
                    nystrom=nystrom,
                    num_landmarks=num_landmarks,
                    kernel_size=kernel_size,
                    prenorm=prenorm,
                    scalenorm=scalenorm,
                )
                for m in self.modalities
            }
        )
        self.modality_drop = None
        self.mmdrop = None
        if multi_modal_drop == "mmdrop_hard":
            self.mmdrop = MultimodalDropout(
                p=p_mmdrop,
                n_modalities=len(self.modalities),
                p_mod=p_drop_modalities,
                mode="hard",
            )
        elif multi_modal_drop == "mmdrop_soft":
            self.mmdrop = MultimodalDropout(
                p=p_mmdrop,
                n_modalities=len(self.modalities),
                p_mod=p_drop_modalities,
                mode="soft",
            )
        elif multi_modal_drop == "dropout":
            self.modality_drop = nn.Dropout(p_mmdrop)
        elif multi_modal_drop == "both":
            self.mmdrop = MultimodalDropout(
                p=p_mmdrop,
                n_modalities=len(self.modalities),
                p_mod=p_drop_modalities,
                mode="hard",
            )
            self.modality_drop = nn.Dropout(p_mmdrop)
        elif multi_modal_drop == "none":
            pass
        else:
            raise ValueError(
                "Not a specified mmdrop value given. Pls check your config file."
            )

        self.out_size = sum([e.out_size for e in self.modality_encoders.values()])
        self.clf = nn.Sequential(
            nn.Linear(self.out_size, self.out_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(self.out_size, num_classes),
        )
예제 #23
0
    def __init__(self, params, pembeds, sizes=None, maps=None, lab2ign=None):
        super(BaseNet, self).__init__()

        self.edg = ['MM', 'SS', 'ME', 'MS', 'ES', 'EE']

        self.dims = {}
        for k in self.edg:
            self.dims[k] = 4 * params['lstm_dim']
 
        self.device = torch.device("cuda:{}".format(params['gpu']) if params['gpu'] != -1 else "cpu")

        self.encoder = Encoder(input_size=params['word_dim'],
                               rnn_size=params['out_dim'],
                               num_layers=1,
                               bidirectional=True,
                               dropout=0.0)

        self.word_embed = EmbedLayer(num_embeddings=sizes['word_size'],
                                     embedding_dim=params['word_dim'],
                                     dropout=params['drop_i'],
                                     ignore=None,
                                     freeze=params['freeze_words'],
                                     pretrained=pembeds,
                                     mapping=maps['word2idx'])

        if params['dist']:
            self.dims['MM'] += params['dist_dim']
            self.dims['SS'] += params['dist_dim']
            self.dist_embed = EmbedLayer(num_embeddings=sizes['dist_size'] + 1,
                                         embedding_dim=params['dist_dim'],
                                         dropout=0.0,
                                         ignore=sizes['dist_size'],
                                         freeze=False,
                                         pretrained=None,
                                         mapping=None)

        if params['context']:
            self.dims['MM'] += (2 * params['lstm_dim'])
            self.attention = Dot_Attention(input_size=2 * params['lstm_dim'],
                                           device=self.device,
                                           scale=False)

        if params['types']:
            for k in self.edg:
                self.dims[k] += (2 * params['type_dim'])

            self.type_embed = EmbedLayer(num_embeddings=3,
                                         embedding_dim=params['type_dim'],
                                         dropout=0.0,
                                         freeze=False,
                                         pretrained=None,
                                         mapping=None)

        self.reduce = nn.ModuleDict()
        for k in self.edg:
            if k != 'EE':
                self.reduce.update({k: nn.Linear(self.dims[k], params['out_dim'], bias=False)})
            elif (('EE' in params['edges']) or ('FULL' in params['edges'])) and (k == 'EE'):
                self.ee = True
                self.reduce.update({k: nn.Linear(self.dims[k], params['out_dim'], bias=False)})
            else:
                self.ee = False

        if params['walks_iter'] and params['walks_iter'] > 0:
            self.walk = WalkLayer(input_size=params['out_dim'],
                                  iters=params['walks_iter'],
                                  beta=params['beta'],
                                  device=self.device)

        self.classifier = Classifier(in_size=params['out_dim'],
                                     out_size=sizes['rel_size'],
                                     dropout=params['drop_o'])
        self.loss = nn.CrossEntropyLoss()

        # hyper-parameters for tuning
        self.beta = params['beta']
        self.dist_dim = params['dist_dim']
        self.type_dim = params['type_dim']
        self.drop_i = params['drop_i']
        self.drop_o = params['drop_o']
        self.gradc = params['gc']
        self.learn = params['lr']
        self.reg = params['reg']
        self.out_dim = params['out_dim']

        # other parameters
        self.mappings = {'word': maps['word2idx'], 'type': maps['type2idx'], 'dist': maps['dist2idx']}
        self.inv_mappings = {'word': maps['idx2word'], 'type': maps['idx2type'], 'dist': maps['idx2dist']}
        self.word_dim = params['word_dim']
        self.lstm_dim = params['lstm_dim']
        self.walks_iter = params['walks_iter']
        self.rel_size = sizes['rel_size']
        self.types = params['types']
        self.ignore_label = lab2ign
        self.context = params['context']
        self.dist = params['dist']
예제 #24
0
    def __init__(self,
                 model_cfg,
                 block='residual',
                 input_size=(1, 256, 256),
                 classes=None,
                 last_act="linear",
                 conv_transpose=False,
                 bn=True,
                 architecture=None,
                 big_drop=0.,
                 small_drop=0.,
                 sddrop=0.,
                 se_ratio=0.0,
                 input_format=None,
                 output_format=None,
                 multi_scale=False,
                 multi_input=False):
        # Parse the network's architecture
        if architecture is None:
            architecture = {
                "first": 32,
                "enc": {
                    "width": [16, 32, 48, 96],
                    "repeat": [2, 3, 3, 4]
                },
                "dec": {
                    "width": [48, 32, 32],
                    "repeat": [2, 2, 1]
                }
            }
        arch = architecture
        if not "dilation" in arch["enc"]:
            arch["enc"]["dilation"] = [1] * len(arch["enc"]["repeat"])
        assert len(
            {"first", "enc", "dec"} -
            {*list(arch.keys())}) == 0, "Missing keys: Need enc, dec, first"
        assert len({"repeat", "width"} - {*list(arch["enc"].keys())}
                   ) == 0, "Missing keys enc: Need width, repeat"
        assert len({"repeat", "width"} - {*list(arch["dec"].keys())}
                   ) == 0, "Missing keys dec: Need width, repeat"
        assert len(arch["enc"]["repeat"]) == len(
            arch["enc"]["width"]), "Mismatched dimensions"
        assert len(arch["enc"]["repeat"]) == len(
            arch["enc"]["dilation"]), "Mismatched dimensions"
        assert len(arch["dec"]["repeat"]) == len(
            arch["dec"]["width"]), "Mismatched dimensions"
        self.arch = arch
        arch["width"] = arch["enc"]["width"] + arch["dec"]["width"]
        arch_enc_len = len(arch["enc"]["width"])
        arch_dec_len = len(arch["dec"]["width"])

        # Construct Super params (input/output-format, tops etc.)
        super().__init__(model_cfg=model_cfg,
                         classes=classes,
                         last_act=last_act,
                         output_format=output_format,
                         input_format=input_format,
                         input_size=input_size,
                         repeat_outputs=arch_dec_len +
                         1 if multi_scale else None)

        self.classes = classes
        self.n_classes = len(classes) + 1
        self.conv_transpose = conv_transpose
        self.multi_scale = multi_scale
        self.multi_input = multi_input

        # Generate Basic building block & Bigger block
        CBA = self.CBA

        all_blocks = get_all_blocks()
        if type(block) is not list:
            block = [block, block]

        blocks = {}
        for bl, name in zip(block, ["enc", "dec"]):
            if bl not in all_blocks:
                raise ValueError("Block " + bl +
                                 " is not a valid block option")
            blocks[name] = all_blocks[bl]

        # Encoder
        bw = first_bw = arch["first"]

        self.input_process = {}
        for key, in_size in zip(self.input_format, self.input_format_sizes):
            self.input_process[key] = CBA(in_size, bw, 3, bn=bn, act=True)
        self.input_process = nn.ModuleDict(self.input_process)

        def get_encoder(wfuse=False):
            prev_bw = arch["first"]
            skips_bw = []
            encoder = []
            fusions = []
            for i, (repeat_block, dilation) in enumerate(
                    zip(self.arch["enc"]["repeat"],
                        self.arch["enc"]["dilation"])):
                is_last = (i + 1 == arch_enc_len)
                if wfuse:
                    fusions.append(
                        FusionModule(
                            model_cfg,
                            in_width=prev_bw,
                            Block=CBA,
                            n_inputs=len(self.input_format) +
                            (0 if i == 0 else 1),  # Own new branch
                            multi_inputs=None if not multi_input or i == 0 else
                            [first_bw] * len(self.input_format),
                        ))
                new_bw = arch["width"][i]
                for j in range(repeat_block):
                    pool = "max" if j + 1 == repeat_block and not is_last else None
                    drop = small_drop if (
                        not is_last) or j + 1 < repeat_block else big_drop

                    encoder.append(
                        ConvBlock(model_cfg,
                                  blocks["enc"],
                                  prev_bw,
                                  new_bw,
                                  3,
                                  bn=bn,
                                  pool=pool,
                                  conv_transpose=self.conv_transpose,
                                  drop=(drop, sddrop),
                                  se_ratio=se_ratio,
                                  dilation=dilation,
                                  first=(i == 0)))
                    prev_bw = new_bw
                skips_bw.append(prev_bw)
            if wfuse:
                fusions.append(
                    nn.Sequential(
                        FusionModule(
                            model_cfg,
                            in_width=prev_bw,
                            n_inputs=len(self.input_format) + 1,
                        ),
                        UpSampling(model_cfg,
                                   in_width=prev_bw,
                                   width=arch["width"][i + 1])))
                return encoder, skips_bw, fusions
            else:
                return encoder

        self.encoders = []
        for _ in self.input_format:  # all the basic encoders
            self.encoders.append(nn.ModuleList(get_encoder()))
        f_enc, skips_bw, fusions = get_encoder(
            wfuse=True)  # the fusion encoder
        self.fusions = nn.ModuleList(fusions)
        self.encoders.append(nn.ModuleList(f_enc))
        self.encoders = nn.ModuleList(self.encoders)

        # Decoders (Classif, Pif, Paf...)
        skips_bw.reverse()  # Reverse for easier indexing

        def get_decoder(prev_bw):
            decoder = []
            tops_prev_bw = []
            tops_upsample = []
            for i, repeat_block in enumerate(self.arch["dec"]["repeat"]):
                if self.multi_scale:
                    tops_prev_bw.append(prev_bw)
                    tops_upsample.append(2**(arch_dec_len - i - 1))
                is_last = (i + 1 == arch_dec_len)
                new_bw = arch["width"][arch_enc_len + i]

                for j in range(repeat_block):
                    pool = "up" if not is_last and j + 1 == repeat_block else None
                    has_skip = j == 0
                    concat_width = skips_bw[i + 1] if has_skip else None

                    elems = [
                        ConvBlock(model_cfg,
                                  blocks["dec"],
                                  prev_bw,
                                  new_bw,
                                  3,
                                  bn=bn,
                                  concatenate=has_skip,
                                  concat_width=concat_width,
                                  conv_transpose=self.conv_transpose,
                                  drop=(small_drop, sddrop),
                                  se_ratio=se_ratio,
                                  last_only=True)
                    ]
                    prev_bw = new_bw
                    if pool is not None:
                        new_bw = arch["width"][arch_enc_len + i +
                                               1]  # search for next one
                        elems.append(
                            UpSampling(model_cfg,
                                       in_width=prev_bw,
                                       width=new_bw))
                        prev_bw = new_bw
                    decoder.append(nn.Sequential(*elems))
            tops_prev_bw.append(prev_bw)
            tops_upsample.append(1)
            return decoder, tops_prev_bw, tops_upsample

        enc_prev_bw = arch["width"][arch_enc_len]
        decoder_types = {
            "mask":
            lambda: get_decoder(enc_prev_bw),
            "keypoints":
            lambda: get_decoder(enc_prev_bw),
            "class":
            lambda: (
                [SubIdentity()] *
                (np.sum(arch["dec"]["repeat"])),  # FIXME: not really efficient
                enc_prev_bw)
        }
        self.decoders = []
        decoder_tops_prev_bw = []
        decoder_tops_upsample = []
        for out_class in self.inference_output_format:
            decoder, bw_dec, up_dec = decoder_types[out_class.name]()
            self.decoders.append(nn.ModuleList(decoder))
            decoder_tops_prev_bw += bw_dec
            decoder_tops_upsample += up_dec
        self.decoders = nn.ModuleList(self.decoders)

        # Tops
        self.tops = self.make_tops(decoder_tops_prev_bw, decoder_tops_upsample)
예제 #25
0
 def __init__(self, projection_names, d_inp=512):
     super(TokenMultiProjectionEncoder, self).__init__()
     self.projections = nn.ModuleDict({
         name: TokenProjectionEncoder(d_inp=d_inp)
         for name in projection_names
     })
예제 #26
0
    def __init__(self, in_ch, do_task_list, fc=1, fc_nc=64, n=1):
        super(FastNeuralStyleTransfer, self).__init__()
        self.nc_list = [
            32 * n, 64 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n,
            128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 128 * n, 64 * n,
            32 * n
        ]
        self.do_task_list = do_task_list
        task_num = len(do_task_list)

        #         self.film_generator = film_generator(sum(self.nc_list), task_num-1, fc, fc_nc)
        self.film_generator = film_generator(sum(self.nc_list), task_num, fc,
                                             fc_nc)

        # Initial convolution layers
        self.encoder = nn.ModuleDict({
            'conv1':
            ConvLayer(in_ch, 32 * n, kernel_size=9, stride=1),
            'film1':
            film(32 * n, task_num),
            'conv2':
            ConvLayer(32 * n, 64 * n, kernel_size=3, stride=2),
            'film2':
            film(64 * n, task_num),
            'conv3':
            ConvLayer(64 * n, 128 * n, kernel_size=3, stride=2),
            'film3':
            film(128 * n, task_num),
        })

        # Residual layers
        self.res = nn.ModuleDict({
            'res1': ResidualBlock(128 * n, task_num),
            'res2': ResidualBlock(128 * n, task_num),
            'res3': ResidualBlock(128 * n, task_num),
            'res4': ResidualBlock(128 * n, task_num),
            'res5': ResidualBlock(128 * n, task_num),
        })

        # Upsampling Layers
        self.decoder = nn.ModuleDict({
            'deconv1':
            UpsampleConvLayer(128 * n,
                              64 * n,
                              kernel_size=3,
                              stride=1,
                              upsample=2),
            'film4':
            film(64 * n, task_num),
            'deconv2':
            UpsampleConvLayer(64 * n,
                              32 * n,
                              kernel_size=3,
                              stride=1,
                              upsample=2),
            'film5':
            film(32 * n, task_num)
        })

        self.lastconv_dic = nn.ModuleDict({})
        for task in self.do_task_list:
            if task == 'autoencoder':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 3, kernel_size=9, stride=1), nn.Tanh())
            elif task == 'segment_semantic':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 17, kernel_size=9, stride=1))

            elif task == 'edge_texture':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh())
            elif task == 'edge_occlusion':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh())
            elif task == 'normal':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 3, kernel_size=9, stride=1), nn.Tanh())
            elif task == 'principal_curvature':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 3, kernel_size=9, stride=1), nn.Tanh())
            elif task == 'keypoints2d':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh())
            elif task == 'keypoints3d':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh())
            elif task == 'depth_zbuffer':
                self.lastconv_dic[task] = nn.Sequential(
                    ConvLayer(32 * n, 1, kernel_size=9, stride=1), nn.Tanh())
        # Non-linearities
        self.relu = torch.nn.ReLU()

        self._initialize_weights()
예제 #27
0
    def __init__(
            self,
            device,
            preproc,
            word_emb_size,
            num_latent_relations,
            hidden_size=300,
            recurrent_size=256,
            discrete_relation=True,
            norm_relation=True,
            symmetric_relation=False,
            combine_latent_relations=False,
            score_type="bilinear",
            learnable_embeddings=False,
            question_encoder=("shared-en-emb", ),
            column_encoder=("shared-en-emb", ),
            table_encoder=("shared-en-emb", ),
    ):
        super().__init__()
        self.preproc = preproc
        self.vocab = preproc.vocab
        self.word_emb_size = word_emb_size
        self._device = device
        self.hidden_size = hidden_size
        self.discrete_relation = discrete_relation
        self.norm_relation = norm_relation
        self.num_latent_relations = num_latent_relations
        self.relations2id = preproc.relations2id
        self.recurrent_size = recurrent_size
        self.dropout = 0.0

        score_funcs = {
            "bilinear":
            lambda: energys.Bilinear(
                hidden_size, num_latent_relations, include_id=True),
            "mlp":
            lambda: energys.MLP(hidden_size, num_latent_relations),
        }

        # build modules
        if learnable_embeddings:
            self.en_learnable_words = self.vocab
        else:
            self.en_learnable_words = None
        shared_modules = {
            "shared-en-emb":
            embedders.LookupEmbeddings(
                self._device,
                self.vocab,
                self.preproc.word_emb,
                self.word_emb_size,
                learnable_words=self.en_learnable_words,
            ),
        }

        if self.preproc.use_ch_vocab:
            self.ch_vocab = preproc.ch_vocab
            if learnable_embeddings:
                self.ch_learnable_words = self.ch_vocab
            else:
                self.ch_learnable_words = None
            shared_modules["shared-ch-emb"] = embedders.LookupEmbeddings(
                self._device,
                self.ch_vocab,
                self.preproc.ch_word_emb,
                self.preproc.ch_word_emb.dim,
                learnable_words=self.ch_learnable_words,
            )
            shared_modules["ch-bilstm"] = lstm.BiLSTM(
                input_size=self.preproc.ch_word_emb.dim,
                output_size=self.recurrent_size,
                dropout=self.dropout,
                use_native=False,
                summarize=False,
            )
            shared_modules["ch-bilstm-native"] = lstm.BiLSTM(
                input_size=self.preproc.ch_word_emb.dim,
                output_size=self.recurrent_size,
                dropout=self.dropout,
                use_native=True,
                summarize=False,
            )

        self.question_encoder = self._build_modules(
            question_encoder, shared_modules=shared_modules)
        self.column_encoder = self._build_modules(
            column_encoder, shared_modules=shared_modules)
        self.table_encoder = self._build_modules(table_encoder,
                                                 shared_modules=shared_modules)

        self.combine_latent_relations = combine_latent_relations
        if combine_latent_relations:
            self.string_link = StringLinking(device, preproc)

        self.symmetric_relation = symmetric_relation
        assert self.symmetric_relation
        if self.symmetric_relation:
            relations = ("qc", "qt")
        else:
            relations = ("qc", "cq", "tq", "qt")
        self.relation_score_dic = nn.ModuleDict(
            {k: score_funcs[score_type]()
             for k in relations})

        if discrete_relation:
            self.temperature = 1  # for gumbel

        if not norm_relation:  # then norm q/col/tab
            self.null_q_token = nn.Parameter(torch.zeros([1, hidden_size]))
            self.null_c_token = nn.Parameter(torch.zeros([1, hidden_size]))
            self.null_t_token = nn.Parameter(torch.zeros([1, hidden_size]))
예제 #28
0
    def __init__(self,
                 num_classes,
                 loss,
                 block,
                 layers,
                 last_stride=2,
                 fc_dims=None,
                 attribute_list=None,
                 attr_dims=None,
                 **kwargs):
        self.inplanes = 64
        super(ResNetMid, self).__init__()
        self.loss = loss
        self.feature_dim = 512 * block.expansion

        print("Attribute_list = ", attribute_list)
        print("Attribute_dims = ", attr_dims)
        # backbone network
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=last_stride)

        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
        assert fc_dims is not None
        # Remove dropout if it doesn't improve
        self.fc_fusion = self._construct_fc_layer(fc_dims,
                                                  512 * block.expansion * 2)
        self.feature_dim += 512 * block.expansion
        self.attr_dims = attr_dims
        #self.classifier = nn.Linear(self.feature_dim, num_classes)
        # modify the final layer to contain classifiers for person id, attributes
        # attribute_list is expected to contain a dict with key as attribute name and value as array of possible values
        # self.classifiers = nn.ModuleList()
        # self.attributes = [('id', num_classes)]
        # self.classifiers.append(nn.Linear(self.feature_dim, num_classes))
        self.classifiers = nn.ModuleDict()
        self.classifiers["id"] = nn.Linear(self.feature_dim, num_classes)

        if attribute_list is not None:
            for atrribute_name, choices in attribute_list.items():
                self.classifiers[atrribute_name] = nn.Linear(
                    self.feature_dim, len(choices))
                if self.attr_dims is not None:
                    self.classifiers[atrribute_name] = nn.Sequential(*[
                        nn.Linear(self.feature_dim, self.attr_dims),
                        nn.ReLU(),
                        nn.Linear(self.attr_dims, len(choices))
                    ])

        # for name, length in self.attributes:
        #     setattr(self, 'attr_' + name, nn.Linear(self.feature_dim, length))
        # self.test_layer = nn.Linear(self.feature_dim, 10)
        self._init_params()
예제 #29
0
    def __init__(self, cfg: DictConfig, trainer: Trainer = None):
        # Get global rank and total number of GPU workers for IterableDataset partitioning, if applicable
        # Global_rank and local_rank is set by LightningModule in Lightning 1.2.0
        self.world_size = 1
        if trainer is not None:
            self.world_size = trainer.world_size

        super().__init__(cfg=cfg, trainer=trainer)
        self.preprocessor = SpeechEncDecSelfSupervisedModel.from_config_dict(
            self._cfg.preprocessor)
        self.encoder = SpeechEncDecSelfSupervisedModel.from_config_dict(
            self._cfg.encoder)

        self.decoder_losses = None

        if "loss_list" in self._cfg:

            self.decoder_losses = {}
            self.loss_alphas = {}
            self.start_step = {}
            self.output_from_layer = {}
            self.transpose_encoded = {}
            self.targets_from_loss = {}
            # need to be separate for moduledict

            for decoder_loss_name, decoder_loss_cfg in self._cfg.loss_list.items(
            ):
                new_decoder_loss = {
                    'decoder':
                    SpeechEncDecSelfSupervisedModel.from_config_dict(
                        decoder_loss_cfg.decoder),
                    'loss':
                    SpeechEncDecSelfSupervisedModel.from_config_dict(
                        decoder_loss_cfg.loss),
                }
                new_decoder_loss = nn.ModuleDict(new_decoder_loss)
                self.decoder_losses[decoder_loss_name] = new_decoder_loss
                self.loss_alphas[decoder_loss_name] = decoder_loss_cfg.get(
                    "loss_alpha", 1.0)
                self.output_from_layer[
                    decoder_loss_name] = decoder_loss_cfg.get(
                        "output_from_layer", None)
                self.targets_from_loss[
                    decoder_loss_name] = decoder_loss_cfg.get(
                        "targets_from_loss", None)
                self.start_step[decoder_loss_name] = decoder_loss_cfg.get(
                    "start_step", 0)
                self.transpose_encoded[
                    decoder_loss_name] = decoder_loss_cfg.get(
                        "transpose_encoded", False)

                if self.output_from_layer[decoder_loss_name] is not None:
                    self.set_access_enabled(access_enabled=True)

            self.decoder_losses = nn.ModuleDict(self.decoder_losses)

        else:
            self.decoder_ssl = SpeechEncDecSelfSupervisedModel.from_config_dict(
                self._cfg.decoder)
            self.loss = SpeechEncDecSelfSupervisedModel.from_config_dict(
                self._cfg.loss)

        self.spec_augmentation = SpeechEncDecSelfSupervisedModel.from_config_dict(
            self._cfg.spec_augment)

        # dropout for features/spectrograms (applied before masking)
        self.dropout_features = (torch.nn.Dropout(self._cfg.dropout_features)
                                 if "dropout_features" in self._cfg else None)

        # dropout for targets (applied before quantization)
        self.dropout_features_q = (torch.nn.Dropout(
            self._cfg.dropout_features_q) if "dropout_features_q" in self._cfg
                                   else None)

        # Feature penalty for preprocessor encodings (for Wav2Vec training)
        if "feature_penalty" in self._cfg:
            self.feat_pen, self.pen_factor = 0.0, self._cfg.feature_penalty
        else:
            self.feat_pen, self.pen_factor = None, None

        if "access" in self._cfg:
            set_access_cfg(self._cfg.access)

        self.apply_masking = True
예제 #30
0
    def change_model_spec(self, model_spec, initial=False, verbose=False):

        # Setup a graph structure to simplify our life
        dag = nx.from_numpy_matrix(model_spec.matrix,
                                   create_using=nx.DiGraph())

        node_labels = {}
        for i, op in enumerate(model_spec.ops):
            if op == "input" or op == "output":
                node_labels[i] = op
            else:
                node_labels[i] = "vertex_%d" % i

        dag = nx.relabel_nodes(dag, node_labels)

        # Resolve dependencies in graph
        self.execution_order = self._get_execution_order(dag)

        # Setup output_sizes for operations and assign vertex types
        out_shapes_list = compute_vertex_channels(self.input_channels,
                                                  self.output_channels,
                                                  model_spec.matrix)
        if verbose:
            logging.info('vertex channels %s', str(out_shapes_list))

        if initial:
            # generate the maximum possible channels.
            out_shapes_list = [
                self.input_channels,
            ] + [
                self.output_channels,
            ] * (len(out_shapes_list) - 1)

        out_shapes = {}
        vertex_types = {}

        for t, (shape, op) in enumerate(zip(out_shapes_list, model_spec.ops)):
            out_shapes[node_labels[t]] = shape
            vertex_types[node_labels[t]] = op

        self.dag = dag

        # print('node labels', node_labels)
        # print('out_shapes_list', out_shapes_list)
        # print('out_shapes', out_shapes)
        # print('vertex_types', vertex_types)
        # return node_labels, out_shapes, vertex_types, out_shapes_list

        # Setup the operations
        if initial:
            self.vertex_ops = nn.ModuleDict()
        for output_node, input_nodes in self.execution_order.items():
            if output_node == "output":
                continue
            # Setup all input shapes
            in_shapes = [out_shapes[node] for node in input_nodes]

            # Check if any of the inputs to the vertex comes form input to module
            is_input = [node == "input" for node in input_nodes]
            if initial:
                # Setup the operation
                self.vertex_ops[output_node] = self.vertex_cls(
                    in_shapes, out_shapes[output_node],
                    vertex_types[output_node], is_input, self.args)
            else:
                # get the input_nodes order, by [input, vertex_i]
                input_nodes_id = [
                    0 if x == 'input' else int(x.split('vertex_')[1])
                    for x in input_nodes
                ]
                self.vertex_ops[output_node].change_vertex_type(
                    in_shapes, out_shapes[output_node],
                    vertex_types[output_node], input_nodes_id)

        # Handle skip connections to output
        self.has_skip = self.dag.has_edge("input", "output")
        if self.has_skip:
            # if len(self.execution_order['output']) > 1:
            self.execution_order["output"].remove("input")
            if len(self.execution_order['output']) == 0:
                del self.execution_order['output']