def __init__(self, **kwargs): super(SimpleStochasticPolicy, self).__init__() hidden_size = kwargs['linear_layers_size'] # actor self.bn = BatchNorm1d(kwargs['input_dim']) self.linears = ModuleList( [Linear(kwargs['input_dim'], hidden_size[0])]) self.linears.extend([ Linear(hidden_size[i - 1], hidden_size[i]) for i in range(1, len(hidden_size)) ]) self.mu = Linear(hidden_size[-1], kwargs['action_dim']) self.log_var = Linear(hidden_size[-1], kwargs['action_dim']) # self.log_var = torch.nn.Parameter(torch.zeros(kwargs['action_dim'])) self.relu = ReLU() self.tanh = Tanh() self.apply(init_weights) # xavier uniform init
def __init__(self, device, input_size=300, layers=(512, 256), output_size=3, p_dropout=0.5, activation='relu', continuous=True): super(simpleMLP, self).__init__() self._device = device self._linmaps = ModuleList([]) last_size = input_size for j in layers: self._linmaps.append(Linear(last_size, j)) last_size = j self._linmaps.append(Linear(last_size, output_size)) self._dropout = Dropout(p=p_dropout) self._activation = activation self._continuous = True
def __init__( self, in_feats: int, out_feats: int, n_steps: int, n_etypes: int, bias: bool = True, ) -> None: """Construct a GGNN layer.""" super().__init__() self.in_feats = in_feats self.out_feats = out_feats self.n_steps = n_steps self.n_etypes = n_etypes self._linears = ModuleList( [Linear(out_feats, out_feats) for _n in range(n_etypes)]) self._gru = GRUCell(input_size=out_feats, hidden_size=out_feats, bias=bias)
def __init__(self, hidden_channels=128, num_filters=128, num_interactions=6, num_gaussians=50, cutoff=10.0, readout='add', dipole=False, mean=None, std=None, atomref=None): super(SchNet, self).__init__() assert readout in ['add', 'sum', 'mean'] self.hidden_channels = hidden_channels self.num_filters = num_filters self.num_interactions = num_interactions self.num_gaussians = num_gaussians self.cutoff = cutoff self.readout = readout self.dipole = dipole self.readout = 'add' if self.dipole else self.readout self.mean = mean self.std = std self.scale = None atomic_mass = torch.from_numpy(ase.data.atomic_masses) self.register_buffer('atomic_mass', atomic_mass) self.embedding = Embedding(100, hidden_channels) self.distance_expansion = GaussianSmearing(0.0, cutoff, num_gaussians) self.interactions = ModuleList() for _ in range(num_interactions): block = InteractionBlock(hidden_channels, num_gaussians, num_filters, cutoff) self.interactions.append(block) self.lin1 = Linear(hidden_channels, hidden_channels // 2) self.act = ShiftedSoftplus() self.lin2 = Linear(hidden_channels // 2, 1) self.register_buffer('initial_atomref', atomref) self.atomref = None if atomref is not None: self.atomref = Embedding(100, 1) self.atomref.weight.data.copy_(atomref) self.reset_parameters()
def __init__(self, embedding_size=512): super().__init__() self.main_module = ModuleList() # conv1 -> 64 x 56 x 56 self.main_module.append( Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1)) ) # conv2 -> 64 x 28 x 28 self.main_module.append(Sequential( Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64), Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128) )) # conv3 -> 128 x 14 x 14 self.main_module.append(Sequential( Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)), Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256) )) #conv4 -> 128 x 7 x 7 self.main_module.append(Sequential( Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)), Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512) )) #conv5 -> 128 x 7 x 7 self.main_module.append(Sequential( Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) )) #conv6 -> 512 x 1 x 1 self.main_module.append(Sequential( Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)), Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0)), Flatten() )) #output layer ->512 self.main_module.append(Sequential( Linear(512, embedding_size, bias=False), BatchNorm1d(embedding_size), ))
def __init__(self, vocab_size: int, num_layer: int, num_head: int, hiddensize: int, feed_back: int, dropout: float, device: str): """ :param vocab_size: translation target vocab num :param num_layer: decoder layer num :param num_head: multihead num :param hiddensize: embedding dimension :param feed_back: hidden dimension in feedback """ super(Decoder, self).__init__() self.Positional = Positional_Encoding(hiddensize, 512, device) self.Embedding = Embedding(vocab_size, hiddensize, padding_idx=0) self.Decoder_Layer = ModuleList() for i in range(num_layer): self.Decoder_Layer.append( DecoderLayer(num_head, hiddensize, feed_back, dropout)) self.feedback = FeedForward(hiddensize, feed_back, dropout=dropout) self.d_model = hiddensize self.Linear = Linear(hiddensize, vocab_size)
def __init__(self, kernel_size, conv_depth, layer_structure=[1, 2], initial_depth=None, activation=SELU): '''Convolution Stack with Residual Structure. Inputs ------ kernel_size : as in Conv2d conv_depth : output depth layer_strucutre: list of ints. The first int represents the number of convolutions to apply initially. After that, each int represents a number of convolutions to apply before adding the residual from the previous state. The default [1,2] does 1 convolution to output "x" and then does two convolutions and adds x initial_depth : depth of the first input (defaults to conv_depth) activation : class for activation ''' super(ResidualConvStack, self).__init__() if initial_depth is None: initial_depth = conv_depth self.convs = ModuleList([]) self.convs.append( Conv2d(initial_depth, conv_depth, kernel_size, padding=kernel_size // 2)) for _ in range(sum(layer_structure) - 1): self.convs.append( Conv2d(conv_depth, conv_depth, kernel_size, padding=kernel_size // 2)) self.layer_structure = layer_structure self.activation = activation()
def initmodel(cencoder, tencoder, embdim): cencoder = copy(cencoder) tencoder = copy(tencoder) tencoder[BD] = len(tencoder) cencoder[BD] = len(cencoder) cembedding = Embedding(len(cencoder), embdim) tembedding = Embedding(len(tencoder), embdim) enc = LSTM(input_size=embdim, hidden_size=LSTMDIM, num_layers=1, bidirectional=1).type(DTYPE) ench0 = randn(2, 1, LSTMDIM).type(DTYPE) encc0 = randn(2, 1, LSTMDIM).type(DTYPE) dec = LSTM(input_size=2 * LSTMDIM + embdim, hidden_size=LSTMDIM, num_layers=1).type(DTYPE) dech0 = randn(2, 1, 2 * LSTMDIM + embdim).type(DTYPE) decc0 = randn(2, 1, 2 * LSTMDIM + embdim).type(DTYPE) pred = Linear(LSTMDIM, len(cencoder)).type(DTYPE) sm = LogSoftmax().type(DTYPE) model = ModuleList([cembedding, tembedding, enc, dec, pred, sm]) optimizer = Adam(model.parameters(), lr=LEARNINGRATE, betas=BETAS) return { 'model': model, 'optimizer': optimizer, 'cencoder': cencoder, 'tencoder': tencoder, 'cembedding': cembedding, 'tembedding': tembedding, 'enc': enc, 'ench0': ench0, 'encc0': encc0, 'dec': dec, 'dech0': dech0, 'decc0': decc0, 'pred': pred, 'sm': sm, 'embdim': embdim }
def __init__(self, in_features, n_classes, cutoffs, div_value=4., head_bias=False, get_full_prob=False): super(AdaptiveLogSoftmaxWithLoss, self).__init__() cutoffs = list(cutoffs) if (cutoffs != sorted(cutoffs)) \ or (min(cutoffs) <= 0) \ or (max(cutoffs) >= (n_classes - 1)) \ or (len(set(cutoffs)) != len(cutoffs)) \ or any([int(c) != c for c in cutoffs]): raise ValueError("cutoffs should be a sequence of unique, positive " "integers sorted in an increasing order, where " "each value is between 1 and n_classes-1") self.in_features = in_features self.n_classes = n_classes self.cutoffs = cutoffs + [n_classes] self.div_value = div_value self.head_bias = head_bias self.get_full_prob=get_full_prob self.shortlist_size = self.cutoffs[0] self.n_clusters = len(self.cutoffs) - 1 self.head_size = self.shortlist_size + self.n_clusters self.head = Linear(self.in_features, self.head_size, bias=self.head_bias) self.tail = ModuleList() self.log_softmax=torch.nn.LogSoftmax(dim=1) for i in range(self.n_clusters): hsz = int(self.in_features // (self.div_value ** (i + 1))) osz = self.cutoffs[i + 1] - self.cutoffs[i] projection = Sequential( Linear(self.in_features, hsz, bias=False), Linear(hsz, osz, bias=False) ) self.tail.append(projection)
def initLayers(self, params): bitwidths, kernel_sizes, nClasses = params bitwidths = bitwidths.copy() layersPlanes = self.initLayersPlanes() # init previous layer prevLayer = None self.maxpool = nn.MaxPool2d( kernel_size=3, stride=2, padding=1) if self.dataset == 'imagenet' else lambda x: x # create list of layers from layersPlanes # supports bitwidth as list of ints, i.e. same bitwidths to all layers # supports bitwidth as list of lists, i.e. specific bitwidths to each layer layers = ModuleList() for i, (layerType, in_planes, out_planes, input_size) in enumerate(layersPlanes): # build layer kernel_sizes_tmp = kernel_sizes if layerType == self.createMixedLayer and self.dataset == 'imagenet': kernel_sizes_tmp = [7] l = layerType(bitwidths, in_planes, out_planes, kernel_sizes_tmp, 2, input_size, prevLayer) else: l = layerType(bitwidths, in_planes, out_planes, kernel_sizes_tmp, 1, input_size, prevLayer) # add layer to layers list layers.append(l) # remove layer specific bitwidths, in case of different bitwidths to layers # if isinstance(bitwidths[0], list): # nMixedOpLayers = 1 if isinstance(l, MixedFilter) \ # else sum(1 for _, m in l._modules.items() if isinstance(m, MixedFilter)) # del bitwidths[:nMixedOpLayers] # # update previous layer # prevLayer = l.outputLayer() self.avgpool = AvgPool2d(7 if self.dataset == 'imagenet' else 4) # self.fc = MixedLinear(bitwidths, 64, 10) self.fc = Linear(512, nClasses).cuda() return layers
def __init__(self, config: ModelConfig): super().__init__() self.input_resize = None for i in range(config.data_config.input_features): if config.input_resize[i] is not None and self.input_resize is None: self.input_resize = ModuleList() if self.input_resize is not None: self.resulting_embeddings_size = 0 for i in range(config.data_config.input_features): if config.input_resize[i] is not None: self.input_resize.append( Linear(in_features=config.input_embeddings_sizes[i], out_features=config.input_resize[i])) self.resulting_embeddings_size += config.input_resize[i] else: self.input_resize.append(None) self.resulting_embeddings_size += config.input_embeddings_sizes[ i] else: self.resulting_embeddings_size = sum(config.input_embeddings_sizes) if config.input_apply_linear: if config.input_linear_size is None: self.input_linear = Linear( in_features=self.resulting_embeddings_size, out_features=self.resulting_embeddings_size) else: self.input_linear = Linear( in_features=self.resulting_embeddings_size, out_features=config.input_linear_size) self.resulting_embeddings_size = config.input_linear_size else: self.input_linear = None if config.input_dropout_rate is not None: self.input_dropout = Dropout(p=config.input_dropout_rate) else: self.input_dropout = None config.encoder_output_size = self.resulting_embeddings_size
def __init__(self, in_channels, out_channels, hiddens=[8], n_heads=[8], activations=['elu'], dropout=0.6, l2_norm=5e-4, lr=0.01, use_bias=True): super().__init__() self.layers = ModuleList() paras = [] inc = in_channels pre_head = 1 for hidden, n_head, activation in zip(hiddens, n_heads, activations): layer = SparseGraphAttention(inc * pre_head, hidden, activation=activation, attn_heads=n_head, reduction='concat', use_bias=use_bias) self.layers.append(layer) paras.append(dict(params=layer.parameters(), weight_decay=l2_norm)) inc = hidden pre_head = n_head layer = SparseGraphAttention(inc * pre_head, out_channels, attn_heads=1, reduction='average', use_bias=use_bias) self.layers.append(layer) # do not use weight_decay in the final layer paras.append(dict(params=layer.parameters(), weight_decay=0.)) self.optimizer = optim.Adam(paras, lr=lr) self.loss_fn = torch.nn.CrossEntropyLoss() self.dropout = Dropout(dropout)
def __init__(self, bitwidths, params, countBopsParams, prevLayer): super(MixedFilter, self).__init__() # assure bitwidths is a list of integers if isinstance(bitwidths[0], list): bitwidths = bitwidths[0] # remove duplicate values in bitwidth bitwidths = self.__removeDuplicateValues(bitwidths) # init previous layer, put it in a list, in order to ignore it as a model in this instance prevLayer = None assert ((prevLayer is None) or (isinstance(prevLayer, MixedFilter))) self.prevLayer = [prevLayer] # init operations mixture self.ops = ModuleList() # ops must have at least one copy self.ops.append(self.initOps(bitwidths, params)) # add more copies if prevLayer exists if prevLayer: for _ in range(prevLayer.numOfOps() - 1): self.ops.append(self.initOps(bitwidths, params)) # init ops forward counters self.opsForwardCounters = self.buildOpsForwardCounters() self.curr_alpha_idx = 0 self.prev_alpha_idx = 0 # init counter for number of consecutive times optimal alpha reached optimal probability limit self.optLimitCounter = 0 # set forward function in order to assure that hooks will take place self.forwardFunc = self.setForwardFunc() # assign pre & post forward hooks self.register_forward_pre_hook(preForward) self.register_forward_hook(postForward) # set hook flag, to make sure hook happens # turn it on on pre-forward hook, turn it off on post-forward hook self.hookDevices = [] # list of (mults, adds, calc_mac_value, batch_size) per op self.bops = self.countOpsBops(countBopsParams)
def __init__(self, num_classes=10, weight_bit_width=None, act_bit_width=None, in_bit_width=None, in_ch=3, device="cpu"): super(CNV_hardware, self).__init__() self.device = device weight_quant_type = commons.get_quant_type(weight_bit_width) act_quant_type = commons.get_quant_type(act_bit_width) in_quant_type = commons.get_quant_type(in_bit_width) stats_op = commons.get_stats_op(weight_quant_type) self.linear_features = ModuleList() # fully connected layers self.linear_features.append( commons.get_act_quant(in_bit_width, in_quant_type)) for in_features, out_features in INTERMEDIATE_FC_FEATURES: self.linear_features.append( commons.get_quant_linear( in_features=in_features, out_features=out_features, per_out_ch_scaling=INTERMEDIATE_FC_PER_OUT_CH_SCALING, bit_width=weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)) self.linear_features.append(BatchNorm1d(out_features)) self.linear_features.append( commons.get_act_quant(act_bit_width, act_quant_type)) # last layer self.fc = commons.get_quant_linear( in_features=LAST_FC_IN_FEATURES, out_features=num_classes, per_out_ch_scaling=LAST_FC_PER_OUT_CH_SCALING, bit_width=weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)
def __init__(self, num_embeddings=12, embedding_dim=64, num_layers=1, num_classes=5, name=None, BagOfWordsType=BagOfWordsType.ATOMS, use_cuda=False): super(BagOfWordsModel, self).__init__(name=name, use_cuda=use_cuda) self.embedding_dim = embedding_dim self.embeddings = Embedding(num_embeddings, embedding_dim) self.softmax = Softmax(dim=-1) self.l_out = Linear(in_features=embedding_dim, out_features=num_classes) self.bow_layers = ModuleList( [BagOfWordsLayer(embedding_dim=embedding_dim, BagOfWordsType=BagOfWordsType) for _ in range(num_layers)] )
def __init__(self, n_feat, n_hid, n_class, dropout, alpha, n_heads): """Dense version of GAT.""" super(GraphAttentionNetwork, self).__init__() self.attentions = ModuleList([ GraphAttentionLayer(n_feat, n_hid, dropout=dropout, alpha=alpha, graph_convolve=False) for _ in range(n_heads) ]) self.out_att = GraphAttentionLayer(n_hid * n_heads, n_class, dropout=dropout, alpha=alpha, graph_convolve=False) self.dropout = Dropout(dropout) self.elu = ELU()
def __init__(self, in_channels: int, hidden_channels: int, out_channels: int, num_embeddings: int, num_layers: int, dropout: float = 0.0, batch_norm: bool = True, relu_first: bool = False): super(SIGN, self).__init__() self.mlps = ModuleList() for _ in range(num_embeddings): mlp = MLP(in_channels, hidden_channels, hidden_channels, num_layers, dropout, batch_norm, relu_first) self.mlps.append(mlp) self.mlp = MLP(num_embeddings * hidden_channels, hidden_channels, out_channels, num_layers, dropout, batch_norm, relu_first)
def __init__(self, ids): """Constructor. Args: ids: A list of YCB object ids. """ super(YCBGroupLayer, self).__init__() self._ids = ids self._layers = ModuleList([YCBLayer(i) for i in self._ids]) self._num_obj = len(self._ids) f = [] offset = 0 for i in range(self._num_obj): if i > 0: offset += self._layers[i - 1].v.size(1) f.append(self._layers[i].f + offset) f = torch.cat(f) self.register_buffer('f', f)
def __init__(self, d_model: int, num_heads: int, num_layers: int, intermediate_size: int, layer_norm_eps: float = 1e-5, dropout_prob: float = 0.1, activation: str = "gelu", use_positional_encoding: bool = True): super().__init__() self.use_positional_encoding = use_positional_encoding self.layers = ModuleList([ TransformerEncoderSubLayer(d_model, num_heads, intermediate_size, layer_norm_eps=layer_norm_eps, dropout_prob=dropout_prob, activation=activation) for _ in range(num_layers) ])
def _set_emb_layers(self) -> None: """Construct embedding layers. If model is non-batch, we use nn.Embedding to learn emb weights. If model is batched (sef.batch_shape is non-empty), we load emb weights posterior samples and construct a parameter list that each parameter is the emb weight of each layer. The shape of weight matrices are ns x num_contexts x emb_dim. """ self.emb_layers = ModuleList([ torch.nn.Embedding(num_embeddings=x, embedding_dim=y, max_norm=1.0) for x, y in self.emb_dims ]) # use posterior of emb weights if len(self.batch_shape) > 0: self.emb_weight_matrix_list = torch.nn.ParameterList([ torch.nn.Parameter( torch.zeros( self.batch_shape + emb_layer.weight.shape, device=self.device, )) for emb_layer in self.emb_layers ])
def __init__(self, base_means, n_tasks): """ Args: base_means (:obj:`list` or :obj:`gpytorch.means.Mean`): If a list, each mean is applied to the data. If a single mean (or a list containing a single mean), that mean is copied `t` times. n_tasks (int): Number of tasks. If base_means is a list, this should equal its length. """ super(MultitaskMean, self).__init__() if isinstance(base_means, Mean): base_means = [base_means] if not isinstance(base_means, list) or (len(base_means) != 1 and len(base_means) != n_tasks): raise RuntimeError("base_means should be a list of means of length either 1 or n_tasks") if len(base_means) == 1: base_means = base_means + [deepcopy(base_means[0]) for i in range(n_tasks - 1)] self.base_means = ModuleList(base_means) self.n_tasks = n_tasks
def initBlocks(self, params, countFlopsFlag): widthRatioList, nClasses, input_size, partition = params blocksPlanes = self.initBlocksPlanes() # init parameters kernel_size = 7 stride = 2 # create list of blocks from blocksPlanes blocks = ModuleList() # output size is divided by 2 due to maxpool after 1st conv layer prevLayer = Input(3, int(input_size / 2)) for i, (blockType, out_planes) in enumerate(blocksPlanes): # increase number of out_planes out_planes *= 4 # copy width ratio list layerWidthRatioList = widthRatioList.copy() # add partition ratio if exists if partition: layerWidthRatioList += [partition[i]] # build layer l = blockType(layerWidthRatioList, out_planes, kernel_size, stride, prevLayer, countFlopsFlag) # update kernel size kernel_size = 3 # update stride stride = 1 # add layer to blocks list blocks.append(l) # update previous layer prevLayer = l.outputLayer() self.maxpool = MaxPool2d(kernel_size=kernel_size, stride=2, padding=1) self.avgpool = AvgPool2d(7) self.fc = Linear(1024, nClasses).cuda() return blocks
def __init__( self, in_channels, hidden_channels, out_channels, num_layers, dropout, num_nodes_dict, x_types, num_edge_types, ): super(RGCN, self).__init__() self.in_channels = in_channels self.hidden_channels = hidden_channels self.out_channels = out_channels self.num_layers = num_layers self.dropout = dropout node_types = list(num_nodes_dict.keys()) num_node_types = len(node_types) self.num_node_types = num_node_types self.num_edge_types = num_edge_types # Create embeddings for all node types that do not come with features. self.emb_dict = ParameterDict({ f"{key}": Parameter(torch.Tensor(num_nodes_dict[key], in_channels)) for key in set(node_types).difference(set(x_types)) }) I, H, O = in_channels, hidden_channels, out_channels # noqa # Create `num_layers` many message passing layers. self.convs = ModuleList() self.convs.append(RGCNConv(I, H, num_node_types, num_edge_types)) for _ in range(num_layers - 2): self.convs.append(RGCNConv(H, H, num_node_types, num_edge_types)) self.convs.append(RGCNConv(H, O, self.num_node_types, num_edge_types)) self.reset_parameters()
def __init__(self, *, img_shape, hidden_channels, out_channels, aux_channels, blocks, attn_heads, pdrop, output_init_scale, attn_version, nonlinearity=concat_elu, pos_emb_init=0.01): # TODO this number super().__init__() in_channels, height, width = img_shape self.pos_emb = Parameter(torch.Tensor(hidden_channels, height, width)) torch.nn.init.normal_(self.pos_emb, mean=0., std=pos_emb_init) self.proj_in = Conv2d(in_channels=in_channels, out_channels=hidden_channels, kernel_size=3, padding=1) self.blocks = ModuleList([ ConvAttnBlock_Imagenet64(channels=hidden_channels, aux_channels=aux_channels, attn_heads=attn_heads, pdrop=pdrop, attn_version=attn_version) for _ in range(blocks) ]) # additional nonlinearity added in compared to CIFAR self.nonlinearity = nonlinearity self.proj_out = Conv2d(in_channels=hidden_channels * 2, out_channels=out_channels, kernel_size=3, padding=1, init_scale=output_init_scale)
def __init__(self, num_classes, weight_bit_width, act_bit_width, in_bit_width, in_channels, out_features, in_features=(28, 28)): super(FC, self).__init__() self.features = ModuleList() self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=in_bit_width)) self.features.append(Dropout(p=DROPOUT)) in_features = reduce(mul, in_features) for out_features in out_features: self.features.append( QuantLinear(in_features=in_features, out_features=out_features, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) in_features = out_features self.features.append(BatchNorm1d(num_features=in_features)) self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.features.append(Dropout(p=DROPOUT)) self.features.append( QuantLinear(in_features=in_features, out_features=num_classes, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(TensorNorm()) self.name = 'FC' for m in self.modules(): if isinstance(m, QuantLinear): torch.nn.init.uniform_(m.weight.data, -1, 1)
def __init__(self, in_channels, out_channels, hids=[16], acts=['relu'], tperc=0.45, dropout=0.5, weight_decay=5e-4, lr=0.01, use_bias=False): super().__init__() layers = ModuleList() paras = [] # use ModuleList to create layers with different size inc = in_channels for hid, act in zip(hids, acts): layer = TrimmedConvolution(inc, hid, activation=act, use_bias=use_bias, tperc=tperc) layers.append(layer) paras.append( dict(params=layer.parameters(), weight_decay=weight_decay)) inc = hid layer = TrimmedConvolution(inc, out_channels, use_bias=use_bias, tperc=tperc) layers.append(layer) # do not use weight_decay in the final layer paras.append(dict(params=layer.parameters(), weight_decay=0.)) self.compile(loss=torch.nn.CrossEntropyLoss(), optimizer=optim.Adam(paras, lr=lr), metrics=[Accuracy()]) self.dropout = Dropout(dropout) self.layers = layers
def __init__(self, models: List[MultilingualTransformerModel], task: MultilingualTranslationTask, cfg: DictConfig, sp_models: Dict[str, SentencePieceProcessor]): super().__init__() self.sp_models = sp_models self.models = ModuleList(models) self.task = task self.cfg = cfg self.dicts: Dict[str, Dictionary] = task.dicts self.langs = task.langs for model in self.models: model.prepare_for_inference_(self.cfg) self.max_positions = utils.resolve_max_positions( self.task.max_positions(), *[model.max_positions() for model in self.models]) self.register_buffer("_float_tensor", torch.tensor([0], dtype=torch.float))
def __init__(self, writer, num_hidden_layers=1): super(CosineNet, self).__init__() input_features = 1 hidden_output_features = 10 final_output_features = 1 self.writer = writer layers = ModuleList() for i in range(num_hidden_layers): if i == 0: layers.append( torch.nn.Linear(input_features, hidden_output_features)) else: layers.append( torch.nn.Linear(hidden_output_features, hidden_output_features)) layers.append(torch.nn.ReLU()) final_layer = torch.nn.Linear(hidden_output_features, final_output_features) layers.append(final_layer) self.model = torch.nn.Sequential(*layers) self.loss_func = torch.nn.MSELoss()
def __init__(self, vocab_size: int, num_encoder_layer: int, hidden_size: int, num_head: int, feedward: int, dropout: float, device: str): """ :param vocab_size: num of word in source language :param num_encoder_layer: number of encoder layer :param hidden_size: the hidden size/ embedding size for single word :param num_head: the number of multi-head :param feedward: hidden dimension for feedback """ super().__init__() self.Encoder_layers = ModuleList() for i in range(num_encoder_layer): self.Encoder_layers.append( EncoderLayer(hidden_size, num_head, feedward, dropout)) #self.Encoder_layers.append(TransformerEncoderLayer(d_model=hidden_size,nhead=8,dim_feedforward=2048)) self.Embedding = Embedding(vocab_size, hidden_size, padding_idx=0) self.Positional_Encoding = Positional_Encoding(hidden_size, 512, device) self.d_model = hidden_size
def init_stacked_analog_lstm( num_layers: int, layer: Type, first_layer_args: Any, other_layer_args: Any ) -> ModuleList: """Construct a list of LSTMLayers over which to iterate. Args: num_layers: number of serially connected LSTM layers layer: LSTM layer type (e.g. AnalogLSTMLayer) first_layer_args: LSTMCell type, input_size, hidden_size, rpu_config, etc. other_layer_args: LSTMCell type, hidden_size, hidden_size, rpu_config, etc. Returns: torch.nn.ModuleList, which is similar to a regular Python list, but where torch.nn.Module methods can be applied """ layers = [layer(*first_layer_args)] \ + [layer(*other_layer_args) for _ in range(num_layers - 1)] return ModuleList(layers)