def __init__(self, size, statistics_group_size=1, name=None, data_layout='data_parallel'): super().__init__() FcBnRelu.global_count += 1 self.instance = 0 self.name = (name if name else 'fcbnrelu{0}'.format(FcBnRelu.global_count)) self.data_layout = data_layout self.fc = lbann.modules.FullyConnectedModule( size, bias=False, name=self.name + '_fc', data_layout=self.data_layout) # Weights for batchnorm scalebias_vals = [1.0] * size + [0.0] * size self.bn_weights = [ lbann.Weights(name='{0}_bn_running_mean'.format(self.name), initializer=lbann.ConstantInitializer(value=0.0)), lbann.Weights(name='{0}_bn_running_var'.format(self.name), initializer=lbann.ConstantInitializer(value=1.0)), lbann.Weights(name='{0}_bn_scalebias'.format(self.name), initializer=lbann.ValueInitializer( values=' '.join([str(x) for x in scalebias_vals]))) ]
def __init__(self, statistics_group_size=1, name=None, data_layout='data_parallel'): super().__init__() BatchNormModule.global_count += 1 self.instance = 0 self.statistics_group_size = statistics_group_size self.name = (name if name else 'bnmodule{0}'.format( BatchNormModule.global_count)) self.data_layout = data_layout # Initialize weights self.scale = lbann.Weights( initializer=lbann.ConstantInitializer(value=1.0), name=self.name + '_scale') self.bias = lbann.Weights( initializer=lbann.ConstantInitializer(value=0.0), name=self.name + '_bias') self.running_mean = lbann.Weights( initializer=lbann.ConstantInitializer(value=0.0), name=self.name + '_running_mean') self.running_variance = lbann.Weights( initializer=lbann.ConstantInitializer(value=1.0), name=self.name + '_running_variance')
def BondEncoder(edge_feature_columns, EDGE_EMBEDDING_DIM): """Embeds the edge features into a vector Args: edge_feature_columns (list(Layers)): A list of layers with edge feaures with shape (NUM_EDGES) EDGE_EMBEDDING_DIM (int): The embedding dimensionality of the edge feature vector Returns: (Layer): A layer containing the embedded edge feature matrix of shape (NUM_EDGES, EDGE_EMBEDDING_DIM) """ # Courtesy of OGB bond_feature_dims = [5, 6, 2] _fan_in = bond_feature_dims[0] _fan_out = EDGE_EMBEDDING_DIM _embedding_weights = lbann.Weights( initializer=_xavier_uniform_init(_fan_in, _fan_out), name="bond_encoder_weights_{}".format(0)) temp = lbann.Embedding(edge_feature_columns[0], num_embeddings=bond_feature_dims[0], embedding_dim=EDGE_EMBEDDING_DIM, weights=_embedding_weights, name="Bond_Embedding_0") for i in range(1, 3): _fan_in = bond_feature_dims[i] _fan_out = EDGE_EMBEDDING_DIM _embedding_weights = lbann.Weights( initializer=_xavier_uniform_init(_fan_in, _fan_out), name="bond_encoder_weights_{}".format(i)) _temp2 = lbann.Embedding(edge_feature_columns[i], num_embeddings=bond_feature_dims[i], embedding_dim=EDGE_EMBEDDING_DIM, weights=_embedding_weights, name="Bond_Embedding_{}".format(i)) temp = lbann.Sum(temp, _temp2) return temp
def __init__(self, mcr, name=None): self.instance = 0 self.name = (name if name else 'ExaGAN{0}'.format(CosmoGAN.global_count)) ## Gathering the CNN modules into variables convbnrelu = lbann.models.resnet.ConvBNRelu fc = lbann.modules.FullyConnectedModule conv = lbann.modules.Convolution2dModule #bn_stats_grp_sz = 0 #0 global, 1 local bn_stats_grp_sz = -1 #0 global, 1 local self.datascale = 4.0 self.linear_scaler=1000.0 self.inits = {'dense': lbann.NormalInitializer(mean=0,standard_deviation=0.02), 'conv': lbann.NormalInitializer(mean=0,standard_deviation=0.02), #should be truncated Normal 'convT':lbann.NormalInitializer(mean=0,standard_deviation=0.02)} ######################### ##### Discriminator d_neurons = [64,128,256,512] d_kernel_size,d_stride,d_padding=5,2,2 ### Implementing convolution, bnorm using convbrelu ##self, out_channels, kernel_size, stride, padding, bn_zero_init, bn_statistics_group_size, relu, name self.d1_conv = [convbnrelu(layer, kernel_size=d_kernel_size, stride=d_stride, padding=d_padding, bn_zero_init=False, bn_statistics_group_size=bn_stats_grp_sz, relu=False, name=self.name+'_disc1_conv'+str(i)) for i,layer in enumerate(d_neurons)] ## Trying without convbrelu # self.d1_conv = [conv(layer,d_kernel_size, stride=d_stride, padding=d_padding, transpose=False, bias= False, weights=[lbann.Weights(initializer=self.inits['conv'])], name=self.name+'_disc1_conv'+str(i)) for i,layer in enumerate(d_neurons)] ### Fully connected layer ##self,size,bias=True,transpose=False,weights=[],activation=None,name=None,data_layout='data_parallel',parallel_strategy={}): self.d1_fc = fc(1,name=self.name+'_disc1_fc', weights=[lbann.Weights(initializer=self.inits['dense'])]) #stacked_discriminator, this will be frozen, no optimizer, #layer has to be named for callback self.d2_conv = [convbnrelu(layer, d_kernel_size, d_stride, d_padding, False, bn_stats_grp_sz, False,name=self.name+'_disc2_conv'+str(i)) for i,layer in enumerate(d_neurons)] # self.d2_conv = [conv(layer,d_kernel_size, stride=d_stride, padding=d_padding, transpose=False, bias=False, weights=[lbann.Weights(initializer=self.inits['conv'])], name=self.name+'_disc2_conv'+str(i)) for i,layer in enumerate(d_neurons)] self.d2_fc = fc(1,name=self.name+'_disc2_fc', weights=[lbann.Weights(initializer=self.inits['dense'])]) ######################### ##### Generator g_neurons = [256,128,64] g_kernel_size,g_stride,g_padding=5,2,2 ### Transpose convolution ##(self, num_dims,out_channels,kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True,weights=[],activation=None,name=None,transpose=False,parallel_strategy={}) self.g_convT = [conv(layer, g_kernel_size, stride=g_stride, padding=g_padding, transpose=True, weights=[lbann.Weights(initializer=self.inits['convT'])]) for i,layer in enumerate(g_neurons)] ### Fully connected fc_size=524288 ### (8 * 8 * 2 * 256) self.g_fc1 = fc(fc_size,name=self.name+'_gen_fc1', weights=[lbann.Weights(initializer=self.inits['dense'])]) ### Final conv transpose self.g_convT3 = conv(1, g_kernel_size, stride=g_stride, padding=g_padding, activation=lbann.Tanh,name='gen_img',transpose=True, weights=[lbann.Weights(initializer=self.inits['convT'])])
def __init__(self, size, bias = True, weights=[], name=None, data_layout='data_parallel'): """Initialize LSTM cell. Args: size (int): Size of output tensor. bias (bool): Whether to apply biases after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most two - a matrix ((4*size) x (input_size+size) dimensions) and a bias (4*size entries). If weights are not provided, the matrix and bias will be initialized in a similar manner as PyTorch (uniform random values from [-1/sqrt(size), 1/sqrt(size)]). name (str): Default name is in the form 'lstmcell<index>'. data_layout (str): Data layout. """ super().__init__() LSTMCell.global_count += 1 self.step = 0 self.size = size self.name = (name if name else 'lstmcell{0}'.format(LSTMCell.global_count)) self.data_layout = data_layout # Initial state self.last_output = lbann.Constant(value=0.0, num_neurons=str(size), name=self.name + '_init_output', data_layout=self.data_layout) self.last_cell = lbann.Constant(value=0.0, num_neurons=str(size), name=self.name + '_init_cell', data_layout=self.data_layout) # Weights self.weights = list(make_iterable(weights)) if len(self.weights) > 2: raise ValueError('`LSTMCell` has at most two weights, ' 'but got {0}'.format(len(self.weights))) if len(self.weights) == 0: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-1/sqrt(self.size), max=-1/sqrt(self.size)), name=self.name+'_matrix')) if len(self.weights) == 1: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-1/sqrt(self.size), max=-1/sqrt(self.size)), name=self.name+'_bias')) # Linearity self.fc = FullyConnectedModule(4*size, bias=bias, weights=self.weights, name=self.name + '_fc', data_layout=self.data_layout)
def __init__(self, size, bias=False, weights=[], activation=None, transpose=False, name=None, parallel_strategy={}): """Initalize channelwise fully connected module Args: size (int or list): Dimension of the output tensor bias (bool): Whether to apply bias after linearity. transpose (bool): Whether to apply transpose of weights matrix. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most two: the matrix and the bias. If weights are not provided, the matrix will be initialized with He normal initialization and the bias with zeros. activation (type): Layer class for activation function. name (str): Default name is in the form 'channelwisefc<index>'. parallel_strategy (dict): Data partitioning scheme. """ super().__init__() ChannelwiseFullyConnectedModule.global_count += 1 self.instance = 0 self.size = size self.bias = bias self.transpose = transpose self.parallel_strategy = parallel_strategy self.name = (name if name else 'channelwisefc{0}'.format( ChannelwiseFullyConnectedModule.global_count)) self.data_layout = 'data_parallel' self.weights = list(make_iterable(weights)) if len(self.weights) > 2: raise ValueError('`FullyConnectedModule` has ' 'at most two weights, ' 'but got {0}'.format(len(self.weights))) if len(self.weights) == 0: self.weights.append( lbann.Weights(initializer=lbann.HeNormalInitializer(), name=self.name + '_matrix')) if self.bias and len(self.weights) == 1: self.weights.append( lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0), name=self.name + '_bias')) self.activation = None if activation: if isinstance(activation, type): self.activation = activation else: self.activation = type(activation) if not issubclass(self.activation, lbann.Layer): raise ValueError('activation must be a layer')
def __init__(self, mcr, name=None): self.instance = 0 self.name = (name if name else 'ExaGAN{0}'.format(CosmoGAN.global_count)) ## Gathering the CNN modules into variables convbnrelu = lbann.models.resnet.ConvBNRelu fc = lbann.modules.FullyConnectedModule conv = lbann.modules.Convolution2dModule #bn_stats_grp_sz = 0 #0 global, 1 local bn_stats_grp_sz = -1 #0 global, 1 local self.datascale = 4.0 self.linear_scaler = 1000.0 self.inits = { 'dense': lbann.NormalInitializer(mean=0, standard_deviation=0.02), 'conv': lbann.NormalInitializer( mean=0, standard_deviation=0.02), #should be truncated Normal 'convT': lbann.NormalInitializer(mean=0, standard_deviation=0.02) } ######################### ##### Generator g_neurons = [256, 128, 64] g_kernel_size, g_stride, g_padding = 5, 2, 2 ### Transpose convolution ##(self, num_dims,out_channels,kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True,weights=[],activation=None,name=None,transpose=False,parallel_strategy={}) self.g_convT = [ conv(layer, g_kernel_size, stride=g_stride, padding=g_padding, transpose=True, weights=[lbann.Weights(initializer=self.inits['convT'])]) for i, layer in enumerate(g_neurons) ] ### Fully connected fc_size = 32768 ### (8 * 8 * 2 * 256) self.g_fc1 = fc( fc_size, name=self.name + '_gen_fc1', weights=[lbann.Weights(initializer=self.inits['dense'])]) ### Final conv transpose self.g_convT3 = conv( 1, g_kernel_size, stride=g_stride, padding=g_padding, activation=lbann.Tanh, name='gen_img', transpose=True, weights=[lbann.Weights(initializer=self.inits['convT'])])
def __init__(self, size, bias=True, weights=[], activation=None, name=None, data_layout='data_parallel'): """Initialize fully-connected module. Args: size (int): Size of output tensor. activation (type): Layer class for activation function. bias (bool): Whether to apply bias after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most two: the matrix and the bias. If weights are not provided, the matrix will be initialized with He normal initialization and the bias with zeros. name (str): Default name is in the form 'fcmodule<index>'. data_layout (str): Data layout. """ super().__init__() FullyConnectedModule.global_count += 1 self.instance = 0 self.size = size self.bias = bias self.name = (name if name else 'fcmodule{0}'.format(FullyConnectedModule.global_count)) self.data_layout = data_layout # Initialize weights # Note: If weights are not provided, matrix weights are # initialized with He normal scheme and bias weights are # initialized with zeros. self.weights = list(make_iterable(weights)) if len(self.weights) > 2: raise ValueError('`FullyConnectedModule` has ' 'at most two weights, ' 'but got {0}'.format(len(self.weights))) if len(self.weights) == 0: self.weights.append( lbann.Weights(initializer=lbann.HeNormalInitializer(), name=self.name+'_matrix')) if len(self.weights) == 1: self.weights.append( lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0), name=self.name+'_bias')) # Initialize activation layer self.activation = None if activation: if isinstance(activation, type): self.activation = activation else: self.activation = type(activation) if not issubclass(self.activation, lbann.Layer): raise ValueError('activation must be a layer')
def __init__(self, input_dim, output_dim, hidden_dims=[]): super().__init__() self.input_dim = input_dim self.output_dim = output_dim self.hidden_dims = utils.make_iterable(hidden_dims) self.weights = [ lbann.Weights(initializer=lbann.HeNormalInitializer()) for _ in range(len(self.hidden_dims)) ] self.weights.append( lbann.Weights(initializer=lbann.HeNormalInitializer()))
def __init__(self, out_channels, kernel_size, stride, padding, use_bn, bn_zero_init, bn_statistics_group_size, activation, parallel_strategy, name, conv_weights): """Initialize ConvBNRelu module. Args: out_channels (int): Number of output channels, i.e. number of convolution filters. kernel_size (int): Size of convolution kernel. stride (int): Convolution stride. padding (int): Convolution padding. use_bn (bool): Whether or not batch normalization layers are used. bn_zero_init (bool): Zero-initialize batch normalization scale. bn_statistics_group_size (int): Aggregation size for batch normalization statistics. activation (lbann.Layer): The activation function. name (str): Module name. conv_weights (lbann.Weights): Pre-defined weights. """ super().__init__() self.name = name self.instance = 0 self.stride = stride self.bn_statistics_group_size = bn_statistics_group_size self.activation = activation self.use_bn = use_bn self.conv_weights = conv_weights self.ps = parallel_strategy # Initialize convolution self.conv = lbann.modules.Convolution3dModule( out_channels, kernel_size, stride=1, padding=padding, bias=False, parallel_strategy=self.ps, weights=self.conv_weights, name=self.name + '_conv') # Initialize batch normalization if self.use_bn: bn_scale_init = 0.0 if bn_zero_init else 1.0 bn_scale = lbann.Weights( initializer=lbann.ConstantInitializer(value=bn_scale_init), name=self.name + '_bn_scale') bn_bias = lbann.Weights( initializer=lbann.ConstantInitializer(value=0.0), name=self.name + '_bn_bias') self.bn_weights = [bn_scale, bn_bias]
def forward(self, image, dims, max_r): """Compute radial profile. Args: image (lbann.Layer): Image dims (tuple of int): Image dimensions (dim 0 corresponds to channel) max_r (int): Maximum radial distance. Pixels outside this distance are ignored. Returns: Layer: num_channels x max_r radial profile """ # Bin spatial positions r, r_counts = self._find_radial_bins(dims[1:], max_r) # Reciprocal of bin counts # Note: If a count is 0, its reciprocal is 0. r_counts_recip = [0 if c == 0 else 1 / c for c in r_counts] # Get scatter indices and scaling factors # Note: Independent binning for each channel (dim 0) tile_dims = [dims[0]] + [1] * r.ndim inds_vals = np.tile(r, tile_dims) inds_vals += np.arange(0, dims[0] * max_r, max_r).reshape(tile_dims) inds_vals[:, r >= max_r] = -1 inds_vals = inds_vals.flatten() scales_vals = r_counts_recip * dims[0] # Construct LBANN layer graph image = lbann.Reshape(image, dims=str_list([np.prod(dims)])) inds = lbann.WeightsLayer( weights=lbann.Weights( lbann.ValueInitializer(values=str_list(inds_vals)), optimizer=lbann.NoOptimizer(), ), dims=str_list([len(inds_vals)]), ) r_sums = lbann.Scatter(image, inds, dims=str_list([dims[0] * max_r])) scales = lbann.WeightsLayer( weights=lbann.Weights( lbann.ValueInitializer(values=str_list(scales_vals)), optimizer=lbann.NoOptimizer(), ), dims=str_list([len(scales_vals)]), ) r_means = lbann.Multiply(scales, r_sums) return lbann.Reshape(r_means, dims=str_list([dims[0], max_r]))
def __init__(self, embed_dim, num_heads, branches, d_kv=None, name=None): super().__init__() MultiheadAttention.global_count += 1 self.instance = 0 assert embed_dim % num_heads == 0, 'embed_dim must be divisible by num_heads' self.embed_dim = embed_dim self.num_heads = num_heads self.head_dim = embed_dim // num_heads if (d_kv == None): self.inner_dim = embed_dim self.head_dim = embed_dim // num_heads else: self.inner_dim = d_kv * num_heads self.head_dim = d_kv if (branches == 0): self.ENABLE_SUBGRAPH = False self.BRANCHES = 0 else: self.ENABLE_SUBGRAPH = True self.BRANCHES = branches # Module name self.name = name if not self.name: self.name = f'multiheadattention{MultiheadAttention.global_count}' # Weights for fully-connected layers self.query_weights = [ lbann.Weights(initializer=lbann.GlorotNormalInitializer(), name=f'{self.name}_query_matrix'), lbann.Weights(initializer=lbann.ConstantInitializer(value=0), name=f'{self.name}_query_bias'), ] self.key_weights = [ lbann.Weights(initializer=lbann.GlorotNormalInitializer(), name=f'{self.name}_key_matrix'), lbann.Weights(initializer=lbann.ConstantInitializer(value=0), name=f'{self.name}_key_bias'), ] self.value_weights = [ lbann.Weights(initializer=lbann.GlorotNormalInitializer(), name=f'{self.name}_value_matrix'), lbann.Weights(initializer=lbann.ConstantInitializer(value=0), name=f'{self.name}_value_bias'), ] #Channelwise FC in SubGraph self.output_weights = [] for head in range(branches): self.output_weights.append([ lbann.Weights(initializer=lbann.GlorotNormalInitializer(), name=f'{self.name}_head{head}_output_matrix'), lbann.Weights(initializer=lbann.ConstantInitializer(value=0), name=f'{self.name}_head{head}_output_bias'), ])
def __init__( self, num_vertices, motif_size, embed_dim, learn_rate, ): super().__init__() self.num_vertices = num_vertices self.embed_dim = embed_dim self.learn_rate = learn_rate # Initialize weights # Note: The discriminator's probability estimate is # D = 1 - exp(-sum_j(prod_i(d_ij))) # Treating the embeddings as i.i.d. random variables: # D = 1 - exp( -embed_dim * d^motif_size ) # log(d) = log( -log(1-D) / embed_dim ) / motif_size # We initialize the embeddings in log-space so that the # discriminator's initial probability estimates have mean 0.5. mean = math.log( -math.log(1-0.5) / embed_dim ) / motif_size radius = math.log( -math.log(1-0.75) / embed_dim ) / motif_size - mean self.log_embedding_weights = lbann.Weights( initializer=lbann.UniformInitializer( min=mean-radius, max=mean+radius), name='discriminator_log_embeddings', )
def _positional_encoding(self, sequence_length): """Positional encodings corresponding to a sequence length. PE(pos,2*i) = sin( pos / 10000**(2*i/hidden_size) ) PE(pos,2*i+1) = cos( pos / 10000**(2*i/hidden_size) ) Encodings are memoized. """ # Construct positional encoding if not in cache if sequence_length not in self._positional_encoding_cache: vals = [] for pos in range(sequence_length): for i in range((self.hidden_size + 1) // 2): x = pos / 10000**(2 * i / self.hidden_size) vals.append(math.sin(x)) vals.append(math.cos(x)) if self.hidden_size % 2 != 0: vals.pop() weights = lbann.Weights( initializer=lbann.ValueInitializer(values=str_list(vals)), optimizer=None, name=f'{self.name}_positional{sequence_length}_weights', ) self._positional_encoding_cache[ sequence_length] = lbann.WeightsLayer( dims=str_list([sequence_length, self.hidden_size]), weights=weights, name=f'{self.name}_positional{sequence_length}', ) # Return cached positional encoding return self._positional_encoding_cache[sequence_length]
def Cumsum(x, dims, axis=0): global _cumsum_cache if len(dims) != 2: raise RuntimeError("dims > 2 not tested/supported for cumsum") if (axis < 0) or (axis > 1): raise RuntimeError("Unsupported cumsum axis: {}".format(axis)) shape = (dims[axis], dims[axis]) if shape not in _cumsum_cache: tril_ones = np.tril(np.full(shape, 1, dtype=int), k=0) tril_ones = lbann.Weights( initializer=lbann.ValueInitializer(values=str_list( np.nditer(tril_ones, order="C")), ), optimizer=lbann.NoOptimizer(), ) tril_ones = lbann.WeightsLayer(dims=str_list(shape), weights=tril_ones) _cumsum_cache[shape] = tril_ones # Apply cumsum tril_ones = _cumsum_cache[shape] if axis == 0: x = lbann.MatMul(tril_ones, x) return x if axis == 1: x = lbann.MatMul(x, tril_ones, transpose_b=True) return x
def Permute(x, dims, axes=None, name="", return_dims=False): global _permute_cache key = (dims, axes) size = np.prod(dims) if key not in _permute_cache: # Construct gather indices inds = np.arange(size).reshape(dims, order="C").transpose(axes) inds = lbann.Weights( initializer=lbann.ValueInitializer(values=str_list( np.nditer(inds, order="C")), ), optimizer=lbann.NoOptimizer(), ) inds = lbann.WeightsLayer(dims=str_list([size]), weights=inds) _permute_cache[key] = inds # Apply transpose with gather inds = _permute_cache[key] if axes == None: new_dims = dims[::-1] else: new_dims = np.array(dims)[list(axes)] x = lbann.Reshape(x, dims=str_list([size])) y = lbann.Gather(x, inds) y = lbann.Reshape(y, dims=str_list(list(new_dims)), name=name) if return_dims: return y, tuple(new_dims) return y
def __init__(self, output_channels, num_layers=1, name=None): """Initialize GatedGraph layer Args: output_channels (int): The output size of the node features num_layers (int): Number of passes through the GRU (default: 1) name (str): Name of the layers and prefix to use for the layers. data_layout (str): Data layout (default: data parallel) """ super().__init__() ## Add Name for the components for the layer GatedGraphConv.global_count += 1 self.name = (name if name else 'GatedGraphConv_{}'.format( GatedGraphConv.global_count)) ## Add variables self.output_channels = output_channels self.rnn = lbann.modules.GRU(output_channels) self.num_layers = num_layers self.data_layout = data_layout self.weights = [] for i in range(num_layers): weight_init = lbann.Weights(initializer=lbann.UniformInitializer( min=-1 / (math.sqrt(output_channels)), max=1 / (math.sqrt(output_channels)))) weight_layer = lbann.WeightsLayer( dims=str_list([output_channels, output_channels]), weights=weight_init, name=self.name + '_' + str(i) + '_weight', data_layout=self.data_layout) self.weights.append(weight_layer)
def __init__(self, input_feature_dims,dictionary_size, embedding_size, ignore_label, name=None): """Initialize Molecular VAE. Args: input_feature_dims (int): analogous to sequence length. dictionary_size (int): vocabulary size embedding_size (int): embedding size ignore_label (int): padding index name (str, optional): Module name (default: 'molvae_module<index>'). """ MolVAE.global_count += 1 self.instance = 0 self.name = (name if name else 'molvae_module{0}'.format(MolVAE.global_count)) self.input_feature_dims = input_feature_dims self.embedding_size = embedding_size self.dictionary_size = dictionary_size self.label_to_ignore = ignore_label self.datatype = lbann.DataType.FLOAT self.weights_datatype = lbann.DataType.FLOAT fc = lbann.modules.FullyConnectedModule gru = GRUModule #Encoder self.encoder_rnn = gru( hidden_size=256, name=self.name+'_encoder_rnn', datatype=self.datatype, weights_datatype=self.weights_datatype, ) self.q_mu = fc(128,name=self.name+'_encoder_qmu') self.q_logvar = fc(128,name=self.name+'_encoder_qlogvar') for w in self.q_mu.weights + self.q_logvar.weights: w.datatype = self.weights_datatype #Decoder self.decoder_rnn = gru( hidden_size=512, num_layers=3, name=self.name+'_decoder_rnn', datatype=self.datatype, weights_datatype=self.weights_datatype, ) self.decoder_lat = fc(512, name=self.name+'_decoder_lat') self.decoder_fc = fc(self.dictionary_size, name=self.name+'_decoder_fc') for w in self.decoder_lat.weights + self.decoder_fc.weights: w.datatype = self.weights_datatype self.decoder_fc.weights[0].initializer = lbann.NormalInitializer( mean=0, standard_deviation=1/math.sqrt(512)) #shared encoder/decoder weights self.emb_weights = lbann.Weights( initializer=lbann.NormalInitializer(mean=0, standard_deviation=1), name='emb_matrix', datatype=self.weights_datatype, )
def _subsequent_mask(self, size): """Attention mask to prevent attending to subsequent positions. The (i,j) entry is -1e9 if i<j and is 0 otherwise. Masks are memoized. """ # Construct mask if not in cache if size not in self._subsequent_mask_cache: vals = np.triu(np.full((size, size), -1e9), k=1) weights = lbann.Weights( initializer=lbann.ValueInitializer( values=str_list(np.nditer(vals))), optimizer=None, name=f'{self.name}_mask{size}_weights', ) self._subsequent_mask_cache[size] = lbann.WeightsLayer( dims=str_list([size, size]), weights=weights, name=f'{self.name}_mask{size}', ) # Return cached mask return self._subsequent_mask_cache[size]
def forward(self, x, dims): """Apply fftshift. Args: x (lbann.Layer): Input tensor dims (tuple of int): Dimensions of x (dim 0 corresponds to channel) Returns: Layer: Output tensor """ # Get gather indices by applying fftshift to tensor filled with indices # Note: Independent fftshift for each channel (dim 0) spatial_size = np.prod(dims[1:]) spatial_inds = np.arange(spatial_size).reshape(dims[1:]) spatial_inds = np.fft.fftshift(spatial_inds) channel_offsets = np.arange(0, dims[0] * spatial_size, spatial_size) channel_offsets = channel_offsets.reshape([-1] + [1] * spatial_inds.ndim) inds = np.expand_dims(spatial_inds, 0) + channel_offsets # Construct LBANN layer graph size = np.prod(dims) x = lbann.Reshape(x, dims=str_list([size])) inds = lbann.WeightsLayer( weights=lbann.Weights( lbann.ValueInitializer(values=str_list(inds.flatten())), optimizer=lbann.NoOptimizer(), ), dims=str_list([size]), ) y = lbann.Gather(x, inds) return lbann.Reshape(y, dims=str_list(dims))
def __init__(self, *args, **kwargs): super().__init__() self.name = kwargs["name"] self.activation = None if "activation" not in kwargs.keys() \ else kwargs["activation"] kwargs["activation"] = None self.conv = lm.Convolution3dModule(*args, **kwargs) bn_scale = lbann.Weights( initializer=lbann.ConstantInitializer(value=1.0), name="{}_bn_scale".format(self.name)) bn_bias = lbann.Weights( initializer=lbann.ConstantInitializer(value=0.0), name="{}_bn_bias".format(self.name)) self.bn_weights = [bn_scale, bn_bias] self.instance = 0
def forward_discriminator2(self,img): ''' Discriminator 2. Weights are frozen as part of Adversarial network = Stacked G + D ''' bn_wts=[lbann.Weights(initializer=lbann.ConstantInitializer(value=1.0)), lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0))] for count,lyr in enumerate(self.d2_conv): if count==0: x=lbann.LeakyRelu(lyr(img), negative_slope=0.2) else : x = lbann.LeakyRelu(lyr(x), negative_slope=0.2) #### without convbrlu # if count==0: x = lbann.LeakyRelu(lbann.BatchNormalization(lyr(img),weights=bn_wts,statistics_group_size=-1),negative_slope=0.2) # else: x = lbann.LeakyRelu(lbann.BatchNormalization(lyr(x),weights=bn_wts,statistics_group_size=-1),negative_slope=0.2) dims=524288 y= self.d2_fc(lbann.Reshape(x,dims=str(dims))) return y
def __init__(self, out_channels, kernel_size, stride, padding, bn_zero_init, bn_statistics_group_size, relu, name): """Initialize ConvBNRelu module. Args: out_channels (int): Number of output channels, i.e. number of convolution filters. kernel_size (int): Size of convolution kernel. stride (int): Convolution stride. padding (int): Convolution padding. bn_zero_init (bool): Zero-initialize batch normalization scale. bn_statistics_group_size (int): Group size for aggregating batch normalization statistics. relu (bool): Apply ReLU activation. name (str): Module name. """ super().__init__() self.name = name self.instance = 0 # Initialize convolution self.conv = lbann.modules.Convolution2dModule(out_channels, kernel_size, stride=stride, padding=padding, bias=False, name=self.name + '_conv') # Initialize batch normalization bn_scale_init = 0.0 if bn_zero_init else 1.0 bn_scale = lbann.Weights( initializer=lbann.ConstantInitializer(value=bn_scale_init), name=self.name + '_bn_scale') bn_bias = lbann.Weights( initializer=lbann.ConstantInitializer(value=0.0), name=self.name + '_bn_bias') self.bn_weights = [bn_scale, bn_bias] self.bn_statistics_group_size = bn_statistics_group_size # Initialize ReLU self.relu = relu
def __init__(self, name=None): self.instance = 0 self.name = (name if name else 'ExaGAN{0}'.format(CosmoGAN.global_count)) convbnrelu = lbann.models.resnet.ConvBNRelu fc = lbann.modules.FullyConnectedModule conv = lbann.modules.Convolution2dModule #bn_stats_grp_sz = 0 #0 global, 1 local bn_stats_grp_sz = -1 #0 global, 1 local ##MCR properties #@todo: make multichannel optional self.datascale = 4 self.linear_scaler=1000. self.inits = {'dense': lbann.NormalInitializer(mean=0,standard_deviation=0.02), 'conv': lbann.NormalInitializer(mean=0,standard_deviation=0.02), #should be truncated Normal 'convT':lbann.NormalInitializer(mean=0,standard_deviation=0.02)} d_neurons = [64,128,256,512] self.d1_conv = [convbnrelu(d_neurons[i], 4, 2, 1, False, bn_stats_grp_sz, False,name=self.name+'_disc1_conv'+str(i)) for i in range(len(d_neurons))] self.d1_fc = fc(1,name=self.name+'_disc1_fc', weights=[lbann.Weights(initializer=self.inits['dense'])]) #stacked_discriminator, this will be frozen, no optimizer, #layer has to be named for callback self.d2_conv = [convbnrelu(d_neurons[i], 4, 2, 1, False, bn_stats_grp_sz, False,name=self.name+'_disc2_conv'+str(i)) for i in range(len(d_neurons))] self.d2_fc = fc(1,name=self.name+'_disc2_fc', weights=[lbann.Weights(initializer=self.inits['dense'])]) #generator g_neurons = [256,128,64] self.g_convT = [conv(g_neurons[i], 5, stride=2, padding=2, transpose=True, weights=[lbann.Weights(initializer=self.inits['convT'])]) for i in range(len(g_neurons))] self.g_fc1 = fc(32768,name=self.name+'_gen_fc1', weights=[lbann.Weights(initializer=self.inits['dense'])]) self.g_convT3 = conv(1, 5, stride=2, padding=2, activation=lbann.Tanh,name='gen_img',transpose=True, weights=[lbann.Weights(initializer=self.inits['convT'])])
def forward_discriminator1(self, img): ''' Discriminator 1 ''' bn_wts = [ lbann.Weights(initializer=lbann.ConstantInitializer(value=1.0)), lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0)) ] for count, lyr in enumerate(self.d1_conv): if count == 0: x = lbann.LeakyRelu(lyr(img), negative_slope=0.2) else: x = lbann.LeakyRelu(lyr(x), negative_slope=0.2) #### without convbrlu # if count==0: x = lbann.LeakyRelu(lbann.BatchNormalization(lyr(img),weights=bn_wts,statistics_group_size=-1),negative_slope=0.2) # else: x = lbann.LeakyRelu(lbann.BatchNormalization(lyr(x),weights=bn_wts,statistics_group_size=-1),negative_slope=0.2) dims = 32768 #dims=25088 ## for padding=1 y = self.d1_fc(lbann.Reshape(x, dims=str(dims))) return y
def __init__( self, embed_dim=512, num_heads=8, feedforward_dim=2048, dropout=0.1, name=None, ): TransformerDecoderLayer.global_count += 1 self.instance = 0 self.embed_dim = embed_dim self.feedforward_dim = feedforward_dim self.dropout_prob = dropout # Module name self.name = name if not self.name: self.name = f'transformerdecoderlayer{TransformerDecoderLayer.global_count}' # Layer modules self.attention1 = lbann.modules.transformer.MultiheadAttention( embed_dim, num_heads, name=f'{self.name}_attention1') self.attention2 = lbann.modules.transformer.MultiheadAttention( embed_dim, num_heads, name=f'{self.name}_attention2') # Weights for fully-connected layers self.fc1_weights = [ lbann.Weights(initializer=lbann.HeNormalInitializer(), name=f'{self.name}_fc1_matrix'), lbann.Weights(initializer=lbann.ConstantInitializer(value=0), name=f'{self.name}_fc1_bias'), ] self.fc2_weights = [ lbann.Weights(initializer=lbann.GlorotNormalInitializer(), name=f'{self.name}_fc2_matrix'), lbann.Weights(initializer=lbann.ConstantInitializer(value=0), name=f'{self.name}_fc2_bias'), ]
def __init__(self, input_channels, output_channels, name=None): super().__init__() self.name = (name if name else 'DenseGraph_{}'.format( DenseGraphConv.global_count)) DenseGraphConv.global_count += 1 bounds = math.sqrt(6.0 / (input_channels + output_channels)) self.weights_1 = lbann.Weights(initializer=lbann.UniformInitializer( min=-bounds, max=bounds), name=self.name + '_Weights_1') self.weights_2 = lbann.Weights(initializer=lbann.UniformInitializer( min=-bounds, max=bounds), name=self.name + '_Weights_2') self.W1 = lbann.WeightsLayer(dims=str_list( [input_channels, output_channels]), name=self.name + '_param_1', weights=self.weights_1) self.W2 = lbann.WeightsLayer(dims=str_list( [input_channels, output_channels]), name=self.name + '_param_2', weights=self.weights_2)
def __init__(self, neuron_dims=[1000, 1000, 1000], activation=lbann.Relu, keep_prob=0.95, name=None): self.instance = 0 self.name = (name if name else 'combo{0}'.format(Combo.global_count)) #shared weights for drug 1 and 2 tracks shared_w = [] for i in range(len(neuron_dims)): shared_w.append( lbann.Weights(initializer=lbann.HeNormalInitializer(), name='drug_matrix' + str(i))) shared_w.append( lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0), name='drug_bias' + str(i))) print("SHARED W ", type(shared_w)) self.geneT = TrackModule(neuron_dims, activation, keep_prob, name=self.name + 'gene_track') self.drug1T = TrackModule(neuron_dims, activation, keep_prob, shared_w, name=self.name + 'drug1_track') self.drug2T = TrackModule(neuron_dims, activation, keep_prob, shared_w, name=self.name + 'drug2_track') self.concatT = TrackModule(neuron_dims, activation, keep_prob, name=self.name + 'concat_track')
def AtomEncoder(node_feature_columns, EMBEDDING_DIM): """Embeds the node features into a vector Args: edge_feature_columns (list(Layers)): A list of layers with node feaures with shape (NUM_NODES) EMBEDDING_DIM (int): The embedding dimensionality of the node feature vector Returns: (Layer): A layer containing the embedded node feature matrix of shape (NUM_NODES, EMBEDDING_DIM) """ # Courtesy of OGB atom_feature_dims = [119, 4, 12, 12, 10, 6, 6, 2, 2] _fan_in = atom_feature_dims[0] _fan_out = EMBEDDING_DIM _embedding_weights = lbann.Weights( initializer=_xavier_uniform_init(_fan_in, _fan_out), name="atom_encoder_weights_{}".format(0)) temp = lbann.Embedding(node_feature_columns[0], num_embeddings=atom_feature_dims[0], embedding_dim=EMBEDDING_DIM, weights=_embedding_weights, name="Atom_Embedding_0") for i in range(1, 9): _fan_in = atom_feature_dims[i] _fan_out = EMBEDDING_DIM _embedding_weights = lbann.Weights( initializer=_xavier_uniform_init(_fan_in, _fan_out), name="atom_encoder_weights_{}".format(i)) _temp2 = lbann.Embedding(node_feature_columns[i], num_embeddings=atom_feature_dims[i], embedding_dim=EMBEDDING_DIM, weights=_embedding_weights, name="Atom_Embedding_{}".format(i)) temp = lbann.Sum(temp, _temp2) return temp
def __init__(self, num_channels, size, bias=True, weights=[], name=None): """Initialize GRU cell. Args: num_channels (int): The number of rows in the matrix to perform GRU size (int): Size of output tensor. bias (bool): Whether to apply biases after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most four - two matrices ((3*size) x (input_size) and (3*size) x (size) dimensions) each and two biases (3*size entries) each. If weights are not provided, the matrix and bias will be initialized in a similar manner as PyTorch (uniform random values from [-1/sqrt(size), 1/sqrt(size)]). name (str): Default name is in the form 'gru<index>'. data_layout (str): Data layout. """ super().__init__() ChannelwiseGRU.global_count += 1 self.step = 0 self.size = size self.num_channels = num_channels self.name = (name if name else f'gru{ChannelwiseGRU.global_count}') self.data_layout = 'data_parallel' scale = 1 / math.sqrt(self.size) self.weights = list(make_iterable(weights)) weight_name = ['_ih_matrix', '_ih_bias', '_hh_matrix', '_hh_bias'] for i in range(4): if (len(self.weights) == i): self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer( min=-scale, max=scale), name=self.name + weight_name[i])) self.ih_fc = ChannelwiseFullyConnectedModule(3 * size, bias=bias, weights=self.weights[:2], name=self.name + '_ih_fc') self.hh_fc = ChannelwiseFullyConnectedModule(3 * size, bias=bias, weights=self.weights[2:], name=self.name + '_hh_fc') self.ones = lbann.Constant(value=1.0, num_neurons=str_list([num_channels, size]), name=self.name + '_ones')