def __init__(self, **kwargs):
        super(SimpleStochasticPolicy, self).__init__()
        hidden_size = kwargs['linear_layers_size']
        # actor
        self.bn = BatchNorm1d(kwargs['input_dim'])
        self.linears = ModuleList(
            [Linear(kwargs['input_dim'], hidden_size[0])])
        self.linears.extend([
            Linear(hidden_size[i - 1], hidden_size[i])
            for i in range(1, len(hidden_size))
        ])
        self.mu = Linear(hidden_size[-1], kwargs['action_dim'])
        self.log_var = Linear(hidden_size[-1], kwargs['action_dim'])
        # self.log_var = torch.nn.Parameter(torch.zeros(kwargs['action_dim']))

        self.relu = ReLU()
        self.tanh = Tanh()

        self.apply(init_weights)  # xavier uniform init
Exemple #2
0
 def __init__(self,
              device,
              input_size=300,
              layers=(512, 256),
              output_size=3,
              p_dropout=0.5,
              activation='relu',
              continuous=True):
     super(simpleMLP, self).__init__()
     self._device = device
     self._linmaps = ModuleList([])
     last_size = input_size
     for j in layers:
         self._linmaps.append(Linear(last_size, j))
         last_size = j
     self._linmaps.append(Linear(last_size, output_size))
     self._dropout = Dropout(p=p_dropout)
     self._activation = activation
     self._continuous = True
Exemple #3
0
 def __init__(
     self,
     in_feats: int,
     out_feats: int,
     n_steps: int,
     n_etypes: int,
     bias: bool = True,
 ) -> None:
     """Construct a GGNN layer."""
     super().__init__()
     self.in_feats = in_feats
     self.out_feats = out_feats
     self.n_steps = n_steps
     self.n_etypes = n_etypes
     self._linears = ModuleList(
         [Linear(out_feats, out_feats) for _n in range(n_etypes)])
     self._gru = GRUCell(input_size=out_feats,
                         hidden_size=out_feats,
                         bias=bias)
    def __init__(self, hidden_channels=128, num_filters=128,
                 num_interactions=6, num_gaussians=50, cutoff=10.0,
                 readout='add', dipole=False, mean=None, std=None,
                 atomref=None):
        super(SchNet, self).__init__()

        assert readout in ['add', 'sum', 'mean']

        self.hidden_channels = hidden_channels
        self.num_filters = num_filters
        self.num_interactions = num_interactions
        self.num_gaussians = num_gaussians
        self.cutoff = cutoff
        self.readout = readout
        self.dipole = dipole
        self.readout = 'add' if self.dipole else self.readout
        self.mean = mean
        self.std = std
        self.scale = None

        atomic_mass = torch.from_numpy(ase.data.atomic_masses)
        self.register_buffer('atomic_mass', atomic_mass)

        self.embedding = Embedding(100, hidden_channels)
        self.distance_expansion = GaussianSmearing(0.0, cutoff, num_gaussians)

        self.interactions = ModuleList()
        for _ in range(num_interactions):
            block = InteractionBlock(hidden_channels, num_gaussians,
                                     num_filters, cutoff)
            self.interactions.append(block)

        self.lin1 = Linear(hidden_channels, hidden_channels // 2)
        self.act = ShiftedSoftplus()
        self.lin2 = Linear(hidden_channels // 2, 1)

        self.register_buffer('initial_atomref', atomref)
        self.atomref = None
        if atomref is not None:
            self.atomref = Embedding(100, 1)
            self.atomref.weight.data.copy_(atomref)

        self.reset_parameters()
 def __init__(self, embedding_size=512):
     super().__init__()
     self.main_module = ModuleList()
     
     # conv1 -> 64 x 56 x 56
     self.main_module.append(
         Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
     )
     # conv2 -> 64 x 28 x 28
     self.main_module.append(Sequential(
         Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64),
         Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
     ))
     
     # conv3 -> 128 x 14 x 14
     self.main_module.append(Sequential(
         Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
         Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
     ))
     
     #conv4 -> 128 x 7 x 7
     self.main_module.append(Sequential(
         Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)),
         Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512)
     ))
     
     #conv5 -> 128 x 7 x 7
     self.main_module.append(Sequential(
         Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
     ))
     
     #conv6 -> 512 x 1 x 1
     self.main_module.append(Sequential(
         Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)),
         Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0)),
         Flatten()
     ))
     
     #output layer ->512
     self.main_module.append(Sequential(
         Linear(512, embedding_size, bias=False),
         BatchNorm1d(embedding_size),
     ))
Exemple #6
0
 def __init__(self, vocab_size: int, num_layer: int, num_head: int,
              hiddensize: int, feed_back: int, dropout: float, device: str):
     """
     :param vocab_size: translation target vocab num
     :param num_layer: decoder layer num
     :param num_head: multihead num
     :param hiddensize: embedding dimension
     :param feed_back: hidden dimension in feedback
     """
     super(Decoder, self).__init__()
     self.Positional = Positional_Encoding(hiddensize, 512, device)
     self.Embedding = Embedding(vocab_size, hiddensize, padding_idx=0)
     self.Decoder_Layer = ModuleList()
     for i in range(num_layer):
         self.Decoder_Layer.append(
             DecoderLayer(num_head, hiddensize, feed_back, dropout))
     self.feedback = FeedForward(hiddensize, feed_back, dropout=dropout)
     self.d_model = hiddensize
     self.Linear = Linear(hiddensize, vocab_size)
Exemple #7
0
    def __init__(self,
                 kernel_size,
                 conv_depth,
                 layer_structure=[1, 2],
                 initial_depth=None,
                 activation=SELU):
        '''Convolution Stack with Residual Structure.
    
    Inputs
    ------
    
    kernel_size    : as in Conv2d
    conv_depth     : output depth
    layer_strucutre: list of ints. The first int represents the number of convolutions
                     to apply initially. After that, each int represents a number of
                     convolutions to apply before adding the residual from the previous
                     state. The default [1,2] does 1 convolution to output "x" and then 
                     does two convolutions and adds x
    initial_depth  : depth of the first input (defaults to conv_depth)
    activation     : class for activation
    
    '''
        super(ResidualConvStack, self).__init__()

        if initial_depth is None:
            initial_depth = conv_depth

        self.convs = ModuleList([])
        self.convs.append(
            Conv2d(initial_depth,
                   conv_depth,
                   kernel_size,
                   padding=kernel_size // 2))

        for _ in range(sum(layer_structure) - 1):
            self.convs.append(
                Conv2d(conv_depth,
                       conv_depth,
                       kernel_size,
                       padding=kernel_size // 2))

        self.layer_structure = layer_structure
        self.activation = activation()
Exemple #8
0
def initmodel(cencoder, tencoder, embdim):
    cencoder = copy(cencoder)
    tencoder = copy(tencoder)
    tencoder[BD] = len(tencoder)
    cencoder[BD] = len(cencoder)
    cembedding = Embedding(len(cencoder), embdim)
    tembedding = Embedding(len(tencoder), embdim)
    enc = LSTM(input_size=embdim,
               hidden_size=LSTMDIM,
               num_layers=1,
               bidirectional=1).type(DTYPE)
    ench0 = randn(2, 1, LSTMDIM).type(DTYPE)
    encc0 = randn(2, 1, LSTMDIM).type(DTYPE)

    dec = LSTM(input_size=2 * LSTMDIM + embdim,
               hidden_size=LSTMDIM,
               num_layers=1).type(DTYPE)
    dech0 = randn(2, 1, 2 * LSTMDIM + embdim).type(DTYPE)
    decc0 = randn(2, 1, 2 * LSTMDIM + embdim).type(DTYPE)

    pred = Linear(LSTMDIM, len(cencoder)).type(DTYPE)
    sm = LogSoftmax().type(DTYPE)

    model = ModuleList([cembedding, tembedding, enc, dec, pred, sm])
    optimizer = Adam(model.parameters(), lr=LEARNINGRATE, betas=BETAS)

    return {
        'model': model,
        'optimizer': optimizer,
        'cencoder': cencoder,
        'tencoder': tencoder,
        'cembedding': cembedding,
        'tembedding': tembedding,
        'enc': enc,
        'ench0': ench0,
        'encc0': encc0,
        'dec': dec,
        'dech0': dech0,
        'decc0': decc0,
        'pred': pred,
        'sm': sm,
        'embdim': embdim
    }
Exemple #9
0
    def __init__(self, in_features, n_classes, cutoffs, div_value=4., head_bias=False, get_full_prob=False):
        super(AdaptiveLogSoftmaxWithLoss, self).__init__()

        cutoffs = list(cutoffs)

        if (cutoffs != sorted(cutoffs)) \
                or (min(cutoffs) <= 0) \
                or (max(cutoffs) >= (n_classes - 1)) \
                or (len(set(cutoffs)) != len(cutoffs)) \
                or any([int(c) != c for c in cutoffs]):

            raise ValueError("cutoffs should be a sequence of unique, positive "
                             "integers sorted in an increasing order, where "
                             "each value is between 1 and n_classes-1")

        self.in_features = in_features
        self.n_classes = n_classes
        self.cutoffs = cutoffs + [n_classes]
        self.div_value = div_value
        self.head_bias = head_bias
        self.get_full_prob=get_full_prob

        self.shortlist_size = self.cutoffs[0]
        self.n_clusters = len(self.cutoffs) - 1
        self.head_size = self.shortlist_size + self.n_clusters

        self.head = Linear(self.in_features, self.head_size, bias=self.head_bias)
        self.tail = ModuleList()
        
        self.log_softmax=torch.nn.LogSoftmax(dim=1)

        for i in range(self.n_clusters):

            hsz = int(self.in_features // (self.div_value ** (i + 1)))
            osz = self.cutoffs[i + 1] - self.cutoffs[i]

            projection = Sequential(
                Linear(self.in_features, hsz, bias=False),
                Linear(hsz, osz, bias=False)
            )

            self.tail.append(projection)
Exemple #10
0
    def initLayers(self, params):
        bitwidths, kernel_sizes, nClasses = params
        bitwidths = bitwidths.copy()

        layersPlanes = self.initLayersPlanes()

        # init previous layer
        prevLayer = None

        self.maxpool = nn.MaxPool2d(
            kernel_size=3, stride=2,
            padding=1) if self.dataset == 'imagenet' else lambda x: x
        # create list of layers from layersPlanes
        # supports bitwidth as list of ints, i.e. same bitwidths to all layers
        # supports bitwidth as list of lists, i.e. specific bitwidths to each layer
        layers = ModuleList()
        for i, (layerType, in_planes, out_planes,
                input_size) in enumerate(layersPlanes):
            # build layer
            kernel_sizes_tmp = kernel_sizes
            if layerType == self.createMixedLayer and self.dataset == 'imagenet':
                kernel_sizes_tmp = [7]
                l = layerType(bitwidths, in_planes, out_planes,
                              kernel_sizes_tmp, 2, input_size, prevLayer)
            else:
                l = layerType(bitwidths, in_planes, out_planes,
                              kernel_sizes_tmp, 1, input_size, prevLayer)
            # add layer to layers list
            layers.append(l)
            # remove layer specific bitwidths, in case of different bitwidths to layers
            # if isinstance(bitwidths[0], list):
            #     nMixedOpLayers = 1 if isinstance(l, MixedFilter) \
            #         else sum(1 for _, m in l._modules.items() if isinstance(m, MixedFilter))
            #     del bitwidths[:nMixedOpLayers]
            # # update previous layer
            # prevLayer = l.outputLayer()

        self.avgpool = AvgPool2d(7 if self.dataset == 'imagenet' else 4)
        # self.fc = MixedLinear(bitwidths, 64, 10)
        self.fc = Linear(512, nClasses).cuda()

        return layers
Exemple #11
0
    def __init__(self, config: ModelConfig):
        super().__init__()

        self.input_resize = None
        for i in range(config.data_config.input_features):
            if config.input_resize[i] is not None and self.input_resize is None:
                self.input_resize = ModuleList()

        if self.input_resize is not None:
            self.resulting_embeddings_size = 0
            for i in range(config.data_config.input_features):
                if config.input_resize[i] is not None:
                    self.input_resize.append(
                        Linear(in_features=config.input_embeddings_sizes[i],
                               out_features=config.input_resize[i]))
                    self.resulting_embeddings_size += config.input_resize[i]
                else:
                    self.input_resize.append(None)
                    self.resulting_embeddings_size += config.input_embeddings_sizes[
                        i]
        else:
            self.resulting_embeddings_size = sum(config.input_embeddings_sizes)

        if config.input_apply_linear:
            if config.input_linear_size is None:
                self.input_linear = Linear(
                    in_features=self.resulting_embeddings_size,
                    out_features=self.resulting_embeddings_size)
            else:
                self.input_linear = Linear(
                    in_features=self.resulting_embeddings_size,
                    out_features=config.input_linear_size)
                self.resulting_embeddings_size = config.input_linear_size
        else:
            self.input_linear = None

        if config.input_dropout_rate is not None:
            self.input_dropout = Dropout(p=config.input_dropout_rate)
        else:
            self.input_dropout = None

        config.encoder_output_size = self.resulting_embeddings_size
Exemple #12
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 hiddens=[8],
                 n_heads=[8],
                 activations=['elu'],
                 dropout=0.6,
                 l2_norm=5e-4,
                 lr=0.01,
                 use_bias=True):

        super().__init__()

        self.layers = ModuleList()
        paras = []

        inc = in_channels
        pre_head = 1
        for hidden, n_head, activation in zip(hiddens, n_heads, activations):
            layer = SparseGraphAttention(inc * pre_head,
                                         hidden,
                                         activation=activation,
                                         attn_heads=n_head,
                                         reduction='concat',
                                         use_bias=use_bias)
            self.layers.append(layer)
            paras.append(dict(params=layer.parameters(), weight_decay=l2_norm))
            inc = hidden
            pre_head = n_head

        layer = SparseGraphAttention(inc * pre_head,
                                     out_channels,
                                     attn_heads=1,
                                     reduction='average',
                                     use_bias=use_bias)
        self.layers.append(layer)
        # do not use weight_decay in the final layer
        paras.append(dict(params=layer.parameters(), weight_decay=0.))

        self.optimizer = optim.Adam(paras, lr=lr)
        self.loss_fn = torch.nn.CrossEntropyLoss()
        self.dropout = Dropout(dropout)
Exemple #13
0
    def __init__(self, bitwidths, params, countBopsParams, prevLayer):
        super(MixedFilter, self).__init__()

        # assure bitwidths is a list of integers
        if isinstance(bitwidths[0], list):
            bitwidths = bitwidths[0]
        # remove duplicate values in bitwidth
        bitwidths = self.__removeDuplicateValues(bitwidths)

        # init previous layer, put it in a list, in order to ignore it as a model in this instance
        prevLayer = None
        assert ((prevLayer is None) or (isinstance(prevLayer, MixedFilter)))
        self.prevLayer = [prevLayer]

        # init operations mixture
        self.ops = ModuleList()
        # ops must have at least one copy
        self.ops.append(self.initOps(bitwidths, params))
        # add more copies if prevLayer exists
        if prevLayer:
            for _ in range(prevLayer.numOfOps() - 1):
                self.ops.append(self.initOps(bitwidths, params))

        # init ops forward counters
        self.opsForwardCounters = self.buildOpsForwardCounters()

        self.curr_alpha_idx = 0
        self.prev_alpha_idx = 0
        # init counter for number of consecutive times optimal alpha reached optimal probability limit
        self.optLimitCounter = 0

        # set forward function in order to assure that hooks will take place
        self.forwardFunc = self.setForwardFunc()
        # assign pre & post forward hooks
        self.register_forward_pre_hook(preForward)
        self.register_forward_hook(postForward)
        # set hook flag, to make sure hook happens
        # turn it on on pre-forward hook, turn it off on post-forward hook
        self.hookDevices = []

        # list of (mults, adds, calc_mac_value, batch_size) per op
        self.bops = self.countOpsBops(countBopsParams)
Exemple #14
0
    def __init__(self,
                 num_classes=10,
                 weight_bit_width=None,
                 act_bit_width=None,
                 in_bit_width=None,
                 in_ch=3,
                 device="cpu"):
        super(CNV_hardware, self).__init__()
        self.device = device

        weight_quant_type = commons.get_quant_type(weight_bit_width)
        act_quant_type = commons.get_quant_type(act_bit_width)
        in_quant_type = commons.get_quant_type(in_bit_width)
        stats_op = commons.get_stats_op(weight_quant_type)

        self.linear_features = ModuleList()

        # fully connected layers
        self.linear_features.append(
            commons.get_act_quant(in_bit_width, in_quant_type))

        for in_features, out_features in INTERMEDIATE_FC_FEATURES:
            self.linear_features.append(
                commons.get_quant_linear(
                    in_features=in_features,
                    out_features=out_features,
                    per_out_ch_scaling=INTERMEDIATE_FC_PER_OUT_CH_SCALING,
                    bit_width=weight_bit_width,
                    quant_type=weight_quant_type,
                    stats_op=stats_op))
            self.linear_features.append(BatchNorm1d(out_features))
            self.linear_features.append(
                commons.get_act_quant(act_bit_width, act_quant_type))

        # last layer
        self.fc = commons.get_quant_linear(
            in_features=LAST_FC_IN_FEATURES,
            out_features=num_classes,
            per_out_ch_scaling=LAST_FC_PER_OUT_CH_SCALING,
            bit_width=weight_bit_width,
            quant_type=weight_quant_type,
            stats_op=stats_op)
Exemple #15
0
    def __init__(self,
                 num_embeddings=12,
                 embedding_dim=64,
                 num_layers=1,
                 num_classes=5,
                 name=None,
                 BagOfWordsType=BagOfWordsType.ATOMS,
                 use_cuda=False):

        super(BagOfWordsModel, self).__init__(name=name, use_cuda=use_cuda)

        self.embedding_dim = embedding_dim
        self.embeddings = Embedding(num_embeddings, embedding_dim)
        self.softmax = Softmax(dim=-1)

        self.l_out = Linear(in_features=embedding_dim, out_features=num_classes)

        self.bow_layers = ModuleList(
            [BagOfWordsLayer(embedding_dim=embedding_dim, BagOfWordsType=BagOfWordsType) for _ in range(num_layers)]
        )
Exemple #16
0
    def __init__(self, n_feat, n_hid, n_class, dropout, alpha, n_heads):
        """Dense version of GAT."""
        super(GraphAttentionNetwork, self).__init__()

        self.attentions = ModuleList([
            GraphAttentionLayer(n_feat,
                                n_hid,
                                dropout=dropout,
                                alpha=alpha,
                                graph_convolve=False) for _ in range(n_heads)
        ])

        self.out_att = GraphAttentionLayer(n_hid * n_heads,
                                           n_class,
                                           dropout=dropout,
                                           alpha=alpha,
                                           graph_convolve=False)

        self.dropout = Dropout(dropout)
        self.elu = ELU()
Exemple #17
0
    def __init__(self,
                 in_channels: int,
                 hidden_channels: int,
                 out_channels: int,
                 num_embeddings: int,
                 num_layers: int,
                 dropout: float = 0.0,
                 batch_norm: bool = True,
                 relu_first: bool = False):
        super(SIGN, self).__init__()

        self.mlps = ModuleList()
        for _ in range(num_embeddings):
            mlp = MLP(in_channels, hidden_channels, hidden_channels,
                      num_layers, dropout, batch_norm, relu_first)
            self.mlps.append(mlp)

        self.mlp = MLP(num_embeddings * hidden_channels, hidden_channels,
                       out_channels, num_layers, dropout, batch_norm,
                       relu_first)
    def __init__(self, ids):
        """Constructor.

    Args:
      ids: A list of YCB object ids.
    """
        super(YCBGroupLayer, self).__init__()

        self._ids = ids
        self._layers = ModuleList([YCBLayer(i) for i in self._ids])
        self._num_obj = len(self._ids)

        f = []
        offset = 0
        for i in range(self._num_obj):
            if i > 0:
                offset += self._layers[i - 1].v.size(1)
            f.append(self._layers[i].f + offset)
        f = torch.cat(f)
        self.register_buffer('f', f)
 def __init__(self,
              d_model: int,
              num_heads: int,
              num_layers: int,
              intermediate_size: int,
              layer_norm_eps: float = 1e-5,
              dropout_prob: float = 0.1,
              activation: str = "gelu",
              use_positional_encoding: bool = True):
     super().__init__()
     self.use_positional_encoding = use_positional_encoding
     self.layers = ModuleList([
         TransformerEncoderSubLayer(d_model,
                                    num_heads,
                                    intermediate_size,
                                    layer_norm_eps=layer_norm_eps,
                                    dropout_prob=dropout_prob,
                                    activation=activation)
         for _ in range(num_layers)
     ])
Exemple #20
0
 def _set_emb_layers(self) -> None:
     """Construct embedding layers.
     If model is non-batch, we use nn.Embedding to learn emb weights. If model is
     batched (sef.batch_shape is non-empty), we load emb weights posterior samples
     and construct a parameter list that each parameter is the emb weight of each
     layer. The shape of weight matrices are ns x num_contexts x emb_dim.
     """
     self.emb_layers = ModuleList([
         torch.nn.Embedding(num_embeddings=x, embedding_dim=y, max_norm=1.0)
         for x, y in self.emb_dims
     ])
     # use posterior of emb weights
     if len(self.batch_shape) > 0:
         self.emb_weight_matrix_list = torch.nn.ParameterList([
             torch.nn.Parameter(
                 torch.zeros(
                     self.batch_shape + emb_layer.weight.shape,
                     device=self.device,
                 )) for emb_layer in self.emb_layers
         ])
Exemple #21
0
    def __init__(self, base_means, n_tasks):
        """
        Args:
            base_means (:obj:`list` or :obj:`gpytorch.means.Mean`): If a list, each mean is applied to the data.
                If a single mean (or a list containing a single mean), that mean is copied `t` times.
            n_tasks (int): Number of tasks. If base_means is a list, this should equal its length.
        """
        super(MultitaskMean, self).__init__()

        if isinstance(base_means, Mean):
            base_means = [base_means]

        if not isinstance(base_means, list) or (len(base_means) != 1 and len(base_means) != n_tasks):
            raise RuntimeError("base_means should be a list of means of length either 1 or n_tasks")

        if len(base_means) == 1:
            base_means = base_means + [deepcopy(base_means[0]) for i in range(n_tasks - 1)]

        self.base_means = ModuleList(base_means)
        self.n_tasks = n_tasks
Exemple #22
0
        def initBlocks(self, params, countFlopsFlag):
            widthRatioList, nClasses, input_size, partition = params

            blocksPlanes = self.initBlocksPlanes()

            # init parameters
            kernel_size = 7
            stride = 2

            # create list of blocks from blocksPlanes
            blocks = ModuleList()
            # output size is divided by 2 due to maxpool after 1st conv layer
            prevLayer = Input(3, int(input_size / 2))

            for i, (blockType, out_planes) in enumerate(blocksPlanes):
                # increase number of out_planes
                out_planes *= 4
                # copy width ratio list
                layerWidthRatioList = widthRatioList.copy()
                # add partition ratio if exists
                if partition:
                    layerWidthRatioList += [partition[i]]
                # build layer
                l = blockType(layerWidthRatioList, out_planes, kernel_size,
                              stride, prevLayer, countFlopsFlag)
                # update kernel size
                kernel_size = 3
                # update stride
                stride = 1
                # add layer to blocks list
                blocks.append(l)
                # update previous layer
                prevLayer = l.outputLayer()

            self.maxpool = MaxPool2d(kernel_size=kernel_size,
                                     stride=2,
                                     padding=1)
            self.avgpool = AvgPool2d(7)
            self.fc = Linear(1024, nClasses).cuda()

            return blocks
Exemple #23
0
    def __init__(
        self,
        in_channels,
        hidden_channels,
        out_channels,
        num_layers,
        dropout,
        num_nodes_dict,
        x_types,
        num_edge_types,
    ):
        super(RGCN, self).__init__()

        self.in_channels = in_channels
        self.hidden_channels = hidden_channels
        self.out_channels = out_channels
        self.num_layers = num_layers
        self.dropout = dropout

        node_types = list(num_nodes_dict.keys())
        num_node_types = len(node_types)

        self.num_node_types = num_node_types
        self.num_edge_types = num_edge_types

        # Create embeddings for all node types that do not come with features.
        self.emb_dict = ParameterDict({
            f"{key}": Parameter(torch.Tensor(num_nodes_dict[key], in_channels))
            for key in set(node_types).difference(set(x_types))
        })

        I, H, O = in_channels, hidden_channels, out_channels  # noqa

        # Create `num_layers` many message passing layers.
        self.convs = ModuleList()
        self.convs.append(RGCNConv(I, H, num_node_types, num_edge_types))
        for _ in range(num_layers - 2):
            self.convs.append(RGCNConv(H, H, num_node_types, num_edge_types))
        self.convs.append(RGCNConv(H, O, self.num_node_types, num_edge_types))

        self.reset_parameters()
Exemple #24
0
    def __init__(self,
                 *,
                 img_shape,
                 hidden_channels,
                 out_channels,
                 aux_channels,
                 blocks,
                 attn_heads,
                 pdrop,
                 output_init_scale,
                 attn_version,
                 nonlinearity=concat_elu,
                 pos_emb_init=0.01):  # TODO this number
        super().__init__()

        in_channels, height, width = img_shape

        self.pos_emb = Parameter(torch.Tensor(hidden_channels, height, width))
        torch.nn.init.normal_(self.pos_emb, mean=0., std=pos_emb_init)

        self.proj_in = Conv2d(in_channels=in_channels,
                              out_channels=hidden_channels,
                              kernel_size=3,
                              padding=1)
        self.blocks = ModuleList([
            ConvAttnBlock_Imagenet64(channels=hidden_channels,
                                     aux_channels=aux_channels,
                                     attn_heads=attn_heads,
                                     pdrop=pdrop,
                                     attn_version=attn_version)
            for _ in range(blocks)
        ])

        # additional nonlinearity added in compared to CIFAR
        self.nonlinearity = nonlinearity

        self.proj_out = Conv2d(in_channels=hidden_channels * 2,
                               out_channels=out_channels,
                               kernel_size=3,
                               padding=1,
                               init_scale=output_init_scale)
Exemple #25
0
    def __init__(self,
                 num_classes,
                 weight_bit_width,
                 act_bit_width,
                 in_bit_width,
                 in_channels,
                 out_features,
                 in_features=(28, 28)):
        super(FC, self).__init__()

        self.features = ModuleList()
        self.features.append(
            QuantIdentity(act_quant=CommonActQuant, bit_width=in_bit_width))
        self.features.append(Dropout(p=DROPOUT))
        in_features = reduce(mul, in_features)
        for out_features in out_features:
            self.features.append(
                QuantLinear(in_features=in_features,
                            out_features=out_features,
                            bias=False,
                            weight_bit_width=weight_bit_width,
                            weight_quant=CommonWeightQuant))
            in_features = out_features
            self.features.append(BatchNorm1d(num_features=in_features))
            self.features.append(
                QuantIdentity(act_quant=CommonActQuant,
                              bit_width=act_bit_width))
            self.features.append(Dropout(p=DROPOUT))
        self.features.append(
            QuantLinear(in_features=in_features,
                        out_features=num_classes,
                        bias=False,
                        weight_bit_width=weight_bit_width,
                        weight_quant=CommonWeightQuant))
        self.features.append(TensorNorm())

        self.name = 'FC'

        for m in self.modules():
            if isinstance(m, QuantLinear):
                torch.nn.init.uniform_(m.weight.data, -1, 1)
Exemple #26
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 hids=[16],
                 acts=['relu'],
                 tperc=0.45,
                 dropout=0.5,
                 weight_decay=5e-4,
                 lr=0.01,
                 use_bias=False):

        super().__init__()

        layers = ModuleList()
        paras = []

        # use ModuleList to create layers with different size
        inc = in_channels
        for hid, act in zip(hids, acts):
            layer = TrimmedConvolution(inc,
                                       hid,
                                       activation=act,
                                       use_bias=use_bias,
                                       tperc=tperc)
            layers.append(layer)
            paras.append(
                dict(params=layer.parameters(), weight_decay=weight_decay))
            inc = hid

        layer = TrimmedConvolution(inc,
                                   out_channels,
                                   use_bias=use_bias,
                                   tperc=tperc)
        layers.append(layer)
        # do not use weight_decay in the final layer
        paras.append(dict(params=layer.parameters(), weight_decay=0.))
        self.compile(loss=torch.nn.CrossEntropyLoss(),
                     optimizer=optim.Adam(paras, lr=lr),
                     metrics=[Accuracy()])
        self.dropout = Dropout(dropout)
        self.layers = layers
Exemple #27
0
    def __init__(self, models: List[MultilingualTransformerModel],
                 task: MultilingualTranslationTask, cfg: DictConfig,
                 sp_models: Dict[str, SentencePieceProcessor]):
        super().__init__()

        self.sp_models = sp_models
        self.models = ModuleList(models)
        self.task = task
        self.cfg = cfg
        self.dicts: Dict[str, Dictionary] = task.dicts
        self.langs = task.langs

        for model in self.models:
            model.prepare_for_inference_(self.cfg)

        self.max_positions = utils.resolve_max_positions(
            self.task.max_positions(),
            *[model.max_positions() for model in self.models])

        self.register_buffer("_float_tensor",
                             torch.tensor([0], dtype=torch.float))
Exemple #28
0
 def __init__(self, writer, num_hidden_layers=1):
     super(CosineNet, self).__init__()
     input_features = 1
     hidden_output_features = 10
     final_output_features = 1
     self.writer = writer
     layers = ModuleList()
     for i in range(num_hidden_layers):
         if i == 0:
             layers.append(
                 torch.nn.Linear(input_features, hidden_output_features))
         else:
             layers.append(
                 torch.nn.Linear(hidden_output_features,
                                 hidden_output_features))
         layers.append(torch.nn.ReLU())
     final_layer = torch.nn.Linear(hidden_output_features,
                                   final_output_features)
     layers.append(final_layer)
     self.model = torch.nn.Sequential(*layers)
     self.loss_func = torch.nn.MSELoss()
Exemple #29
0
    def __init__(self, vocab_size: int, num_encoder_layer: int,
                 hidden_size: int, num_head: int, feedward: int,
                 dropout: float, device: str):
        """

        :param vocab_size: num of word in source language
        :param num_encoder_layer:  number of encoder layer
        :param hidden_size: the hidden size/ embedding size for single word
        :param num_head: the number of multi-head
        :param feedward: hidden dimension for feedback
        """
        super().__init__()
        self.Encoder_layers = ModuleList()
        for i in range(num_encoder_layer):
            self.Encoder_layers.append(
                EncoderLayer(hidden_size, num_head, feedward, dropout))
            #self.Encoder_layers.append(TransformerEncoderLayer(d_model=hidden_size,nhead=8,dim_feedforward=2048))
        self.Embedding = Embedding(vocab_size, hidden_size, padding_idx=0)
        self.Positional_Encoding = Positional_Encoding(hidden_size, 512,
                                                       device)
        self.d_model = hidden_size
Exemple #30
0
def init_stacked_analog_lstm(
        num_layers: int,
        layer: Type,
        first_layer_args: Any,
        other_layer_args: Any
) -> ModuleList:
    """Construct a list of LSTMLayers over which to iterate.

    Args:
        num_layers: number of serially connected LSTM layers
        layer: LSTM layer type (e.g. AnalogLSTMLayer)
        first_layer_args: LSTMCell type, input_size, hidden_size, rpu_config, etc.
        other_layer_args: LSTMCell type, hidden_size, hidden_size, rpu_config, etc.

    Returns:
        torch.nn.ModuleList, which is similar to a regular Python list,
        but where torch.nn.Module methods can be applied
    """
    layers = [layer(*first_layer_args)] \
        + [layer(*other_layer_args) for _ in range(num_layers - 1)]
    return ModuleList(layers)