def forward(self, img, z, mcr): ''' Steps: - Modify image if using mcr - D1 + imgs -> d1_real - G + noise -> gen_imgs - D1 + gen_imgs -> d1_fake - Adv (D2) + gen_imgs Return D outputs and gen_imgs ''' print('MCR in forward', mcr) if mcr: ### Multi-channel rescaling. Add extra channel for real images. Generated images are rescaled inside generator linear_scale = 1 / self.linear_scaler ch2 = lbann.Tanh( lbann.WeightedSum(self.inv_transform(lbann.Identity(img)), scaling_factors=str(linear_scale))) y = lbann.Concatenation(lbann.Identity(img), ch2, axis=0) img = lbann.Reshape(y, dims='2 128 128') else: img = lbann.Reshape(img, dims='1 128 128') d1_real = self.forward_discriminator1(img) #instance1 gen_img = self.forward_generator(z, mcr=mcr) d1_fake = self.forward_discriminator1( lbann.StopGradient(gen_img)) #instance2 d_adv = self.forward_discriminator2( gen_img) #instance 3 //need to freeze #d1s share weights, d1_w is copied to d_adv (through replace weight callback) and freeze return d1_real, d1_fake, d_adv, gen_img, img
def forward(self, x): """Do the VAE forward step :param x: list of tensors of longs, embed representation of input :return: float, kl term component of loss :return: float, recon component of loss """ x = lbann.Slice(x, slice_points=str_list([0, self.input_feature_dims])) x = lbann.Identity(x) x_emb = lbann.Embedding(x, num_embeddings=self.dictionary_size, embedding_dim=self.embedding_size, name='emb', weights=self.emb_weights) # Encoder: x -> z, kl_loss z, kl_loss = self.forward_encoder(x_emb) # Decoder: x, z -> recon_loss pred = self.forward_decoder(x_emb, z) recon_loss = self.compute_loss(x, pred) # Hack to remove blocking GPU allreduce in evaluation layer kl_loss = lbann.Identity(kl_loss, device='CPU') recon_loss = lbann.Identity(recon_loss, device='CPU') return kl_loss, recon_loss
def make_model( motif_size, walk_length, num_vertices, embed_dim, learn_rate, num_epochs, embeddings_dir, ): # Layer graph input_ = lbann.Slice( lbann.Input(data_field='samples'), slice_points=str_list([0, motif_size, motif_size+walk_length]), ) motif_indices = lbann.Identity(input_) walk_indices = lbann.Identity(input_) gan = model.gan.CommunityGAN( num_vertices, motif_size, embed_dim, learn_rate, ) loss, real_disc_prob, fake_disc_prob, gen_prob = gan( motif_indices, motif_size, walk_indices, walk_length, ) # Metrics metrics = [ lbann.Metric(real_disc_prob, name='D(real)'), lbann.Metric(fake_disc_prob, name='D(fake)'), lbann.Metric(gen_prob, name='G'), ] # Callbacks callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDumpWeights(directory=embeddings_dir, epoch_interval=num_epochs), ] # Perform computation at double precision for l in lbann.traverse_layer_graph(input_): l.datatype = lbann.DataType.DOUBLE for w in l.weights: w.datatype = lbann.DataType.DOUBLE # Contruct model return lbann.Model( num_epochs, layers=lbann.traverse_layer_graph(input_), objective_function=loss, metrics=metrics, callbacks=callbacks, )
def forward(self, x): """Perform LSTM step. State from previous steps is used to compute output. """ self.step += 1 name = '{0}_step{1}'.format(self.name, self.step) # Apply linearity input_concat = lbann.Concatenation([x, self.last_output], name=name + '_input', data_layout=self.data_layout) fc = self.fc(input_concat) # Get gates and cell update slice = lbann.Slice(fc, slice_points=_str_list([0, self.size, 4*self.size]), name=name + '_fc_slice', data_layout=self.data_layout) cell_update = lbann.Tanh(slice, name=name + '_cell_update', data_layout=self.data_layout) sigmoid = lbann.Sigmoid(slice, name=name + '_sigmoid', data_layout=self.data_layout) slice = lbann.Slice(sigmoid, slice_points=_str_list([0, self.size, 2*self.size, 3*self.size]), name=name + '_sigmoid_slice', data_layout=self.data_layout) f = lbann.Identity(slice, name=name + '_forget_gate', data_layout=self.data_layout) i = lbann.Identity(slice, name=name + '_input_gate', data_layout=self.data_layout) o = lbann.Identity(slice, name=name + '_output_gate', data_layout=self.data_layout) # Cell state cell_forget = lbann.Multiply([f, self.last_cell], name=name + '_cell_forget', data_layout=self.data_layout) cell_input = lbann.Multiply([i, cell_update], name=name + '_cell_input', data_layout=self.data_layout) cell = lbann.Add([cell_forget, cell_input], name=name + '_cell', data_layout=self.data_layout) # Output cell_act = lbann.Tanh(cell, name=name + '_cell_activation', data_layout=self.data_layout) output = lbann.Multiply([o, cell_act], name=name, data_layout=self.data_layout) # Update state and return output self.last_cell = cell self.last_output = output return output
def forward_discriminator2(self,y): ch2 = self.inv_transform(lbann.Identity(y)) y = lbann.Concatenation(lbann.Identity(y),ch2,axis=0) img = lbann.Reshape(y, dims='2 128 128') x = lbann.LeakyRelu(self.d2_conv[0](img), negative_slope=0.2) x = lbann.LeakyRelu(self.d2_conv[1](x), negative_slope=0.2) x = lbann.LeakyRelu(self.d2_conv[2](x), negative_slope=0.2) x = lbann.LeakyRelu(self.d2_conv[3](x), negative_slope=0.2) return self.d2_fc(lbann.Reshape(x,dims='32768'))
def f_invtransform(y, scale=4.0): ### Transform to original space ''' The inverse of the transformation function that scales the data before training ''' inv_transform = lbann.WeightedSum(lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y))), scaling_factors=str(scale)) return inv_transform
def inv_transform(self, y): ### Original transformation ''' The inverse of the transformation function that scales the data before training ''' inv_transform = lbann.WeightedSum(lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y))), scaling_factors=str(self.datascale)) return inv_transform
def clone(self): """Generates a clone of the GraphVertexData object. Results in a splitting in the DAG. """ cloned_layers = [] for i,node in enumerate(self.layers): temp = lbann.Split(node) layers[i] = lbann.Identity(temp) cloned_layers.append(lbann.Identity(temp)) return GraphVertexData(cloned_layers, self.num_features)
def construct_model(): """Model description """ import lbann import lbann.modules fc = lbann.modules.FullyConnectedModule conv = lbann.modules.Convolution2dModule conv1 = conv(20, 3, stride=1, padding=1, name='conv1') conv2 = conv(20, 3, stride=1, padding=1, name='conv2') fc1 = fc(100, name='fc1') fc2 = fc(20, name='fc2') fc3 = fc(num_classes, name='fc3') # Layer graph input = lbann.Input(name='inp_tensor', target_mode='classification') inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0, dims - 1, dims]), name='inp_slice') xdata = lbann.Identity(inp_slice) ylabel = lbann.Identity(inp_slice, name='gt_y') #NHWC to NCHW x = lbann.Reshape(xdata, dims='14 13 13') x = conv2(conv1(x)) x = lbann.Reshape(x, dims='3380') x = lbann.Dropout(lbann.Relu(fc1(x)), keep_prob=0.5) x = lbann.Dropout(fc2(x), keep_prob=0.5) pred = lbann.Softmax(fc3(x)) gt_label = lbann.OneHot(ylabel, size=num_classes) loss = lbann.CrossEntropy([pred, gt_label], name='loss') acc = lbann.CategoricalAccuracy([pred, gt_label]) layers = list(lbann.traverse_layer_graph(input)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) obj = lbann.ObjectiveFunction(loss) callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] # Construct model num_epochs = 10 return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=[lbann.Metric(acc, name='accuracy', unit='%')], objective_function=obj, callbacks=callbacks)
def forward(self, x, z): """Do the WAE forward step :param x: list of tensors of longs, embed representation of input :return: float, kl term component of loss :return: float, recon component of loss """ x = lbann.Slice(x, slice_points=str_list([0, self.input_feature_dims])) x = lbann.Identity(x) x_emb = lbann.Embedding(x, num_embeddings=self.dictionary_size, embedding_dim=self.embedding_size, name='emb', weights=self.emb_weights) # Encoder: x -> z, kl_loss z_sample = self.forward_encoder(x_emb) eps = lbann.Gaussian(mean=self.gmean, stdev=self.gstd, hint_layer=z_sample) z_sample = lbann.Add([z_sample, eps]) # Decoder: x, z -> recon_loss #pred = self.forward_decoder(x_emb, z_sample) pred, arg_max = self.forward_decoder(x_emb, z_sample) recon_loss = self.compute_loss(x, pred) # Hack to remove blocking GPU allreduce in evaluation layer #kl_loss = lbann.Identity(kl_loss, device='CPU') recon_loss = lbann.Identity(recon_loss, device='CPU') z_prior = lbann.Tessellate( lbann.Reshape(z, dims=str_list([1, self.zdim])), dims=str_list([self.input_feature_dims, self.zdim]), ) d_real = self.discriminator0( lbann.Concatenation([x_emb, z_prior], axis=1)) z_sample0 = lbann.Tessellate( lbann.Reshape(z_sample, dims=str_list([1, self.zdim])), dims=str_list([self.input_feature_dims, self.zdim]), ) y_z_sample = lbann.Concatenation([x_emb, z_sample0], axis=1) d_fake = self.discriminator0(lbann.StopGradient(y_z_sample)) d_adv = self.discriminator1(y_z_sample) #freeze return recon_loss, d_real, d_fake, d_adv, arg_max
def forward( self, motif_indices, motif_size, walk_indices, walk_length, ): # Apply generator fake_motif_indices, gen_prob, gen_log_prob = self.generator( walk_length, walk_indices, motif_size, ) # Get discriminator embeddings in log-space all_motif_indices = lbann.Concatenation(motif_indices, fake_motif_indices) all_motif_log_embeddings = self.discriminator.get_log_embeddings( all_motif_indices) all_motif_log_embeddings = lbann.Slice( all_motif_log_embeddings, slice_points=str_list([0, motif_size, 2 * motif_size]), ) real_motif_log_embeddings = lbann.Identity(all_motif_log_embeddings) fake_motif_log_embeddings = lbann.Identity(all_motif_log_embeddings) # Apply discriminator real_disc_prob, real_disc_log_not_prob \ = self.discriminator(motif_size, real_motif_log_embeddings) fake_disc_prob, fake_disc_log_not_prob \ = self.discriminator(motif_size, fake_motif_log_embeddings) # Loss function # L_disc = - log(D(real)) - log(1-D(fake)) # L_gen = - log(G) * stop_gradient(log(1-D(fake))) real_disc_log_prob \ = lbann.Log(lbann.Clamp(real_disc_prob, min=1e-37, max=1)) disc_loss = lbann.WeightedSum( real_disc_log_prob, fake_disc_log_not_prob, scaling_factors=str_list([-1, -1]), ) gen_loss = lbann.Multiply( gen_log_prob, lbann.StopGradient(fake_disc_log_not_prob), ) loss = lbann.Add(disc_loss, gen_loss) return loss, real_disc_prob, fake_disc_prob, gen_prob
def inv_transform(self, y): ''' The inverse of the transformation function that scales the data before training ''' inv_transform = lbann.WeightedSum(lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y))), scaling_factors=str(self.datascale)) #linear_scale = 1/self.linear_scaler #CH2 = lbann.Tanh(lbann.WeightedSum(inv_transform,scaling_factors=str(linear_scale))) #return CH2 return inv_transform
def decoder_cnn(self, z): x = self.dec_cnn_fc(z) sca = self.dec_fc_sca(lbann.Identity(x)) img = lbann.Reshape(lbann.Identity(x), dims="16 8 8", name=self.name + 'dec_reshape0') img = self.dec_convT[2](lbann.Relu(self.dec_convT[1](lbann.Relu( self.dec_convT[0](img))))) #concat for common interface, slice in output img = lbann.Reshape(img, dims=str(64 * 64 * 4), name=self.name + 'dec_reshape1') #?? check tensor shape #todo check that concat size == dec_out_dim return lbann.Concatenation([img, sca], axis=0)
def forward(self, x): x_slice = lbann.Slice(x, axis=0, slice_points="0 921 4750 8579", name='inp_slice') gene = self.geneT(lbann.Identity(x_slice)) drug1 = self.drug1T(lbann.Identity(x_slice)) drug2 = self.drug2T(lbann.Identity(x_slice)) concat = self.concatT( lbann.Concatenation([gene, drug1, drug2], name=self.name + 'concat')) response_fc = lbann.FullyConnected(concat, num_neurons=1, has_bias=True) return response_fc
def forward_encoder(self, x_emb): """Encoder step, emulating z ~ E(x) = q_E(z|x) :param x_emb: (n_batch, len(x), d_z) of floats, embeddings for input sentence x :return: (n_batch, d_z) of floats, sample of latent vector z :return: float, kl term component of loss """ # _, h = self.encoder_rnn(x, None) h = self.encoder_rnn(x_emb, None) h = lbann.Slice( h, slice_points=str_list( [self.input_feature_dims - 1, self.input_feature_dims]), axis=0, ) h = lbann.Identity(h) mu, logvar = self.q_mu(h), self.q_logvar(h) # Set datatype of previous layers # Note: Depth-first search from mu and logvar to x_emb stack = [mu, logvar] in_stack = {l: True for l in stack} while stack: l = stack.pop() if type(l) not in (lbann.Slice, lbann.Reshape, lbann.Tessellate): l.datatype = self.datatype for parent in l.parents: if parent not in in_stack and parent is not x_emb: stack.append(parent) in_stack[parent] = True # eps = torch.randn_like(mu) eps = lbann.Gaussian(mean=0, stdev=1, hint_layer=mu) # z = mu + (logvar / 2).exp() * eps z = lbann.Add([ mu, (lbann.Multiply([ lbann.Exp(lbann.WeightedSum(logvar, scaling_factors='0.5')), eps ])) ]) # kl_loss = 0.5 * (logvar.exp() + mu ** 2 - 1 - logvar).sum(1).mean() kl_loss = lbann.Reduction( lbann.WeightedSum( lbann.Exp(logvar), lbann.Square(mu), self.constant(1, hint_layer=mu), logvar, scaling_factors='0.5 0.5 -0.5 -0.5', ), mode='sum', ) return z, kl_loss
def construct_layer_graph(statistics_group_size, version, cumulative_layer_num, input_node): # Input data images_node = lbann.Identity(input_node) cumulative_layer_num += 1 log('Identity. cumulative_layer_num={n}'.format(n=cumulative_layer_num)) # Use input_node, not images_node. image_labels_node = lbann.Identity(input_node) cumulative_layer_num += 1 log('Identity. cumulative_layer_num={n}'.format(n=cumulative_layer_num)) # Use images_node, not image_labels_node. probabilities = densenet(statistics_group_size, version, cumulative_layer_num, images_node) return probabilities, image_labels_node
def inv_transform(self, y): ### Original transformation ''' The inverse of the transformation function that scales the data before training ''' inv_transform = lbann.WeightedSum(lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y))), scaling_factors=str(self.datascale)) return inv_transform # def inv_transform(self, y):### New tranformation : log-linear # threshold = lbann.Constant(value=0.5, hint_layer=y) # is_above_threshold = lbann.Greater(y, threshold) # is_below_threshold = lbann.LogicalNot(is_above_threshold) # below = lbann.SafeDivide( # lbann.Subtract(y, lbann.Constant(value=1, hint_layer=y)), # lbann.Constant(value=0.03, hint_layer=y), # ) # above = lbann.Exp(lbann.SafeDivide( # lbann.Subtract( # y, # lbann.Constant(value=0.5-0.5/math.log(300)*math.log(50), hint_layer=y)), # lbann.Constant(value=0.5/math.log(300), hint_layer=y), # )) # return lbann.Add( # lbann.Multiply(is_above_threshold, above), # lbann.Multiply(is_below_threshold, below), # ) # def f_invtransform_new(y): # if y<=0.5: # a=0.03;b=-1.0 # return (y-b)/a # elif y>0.5: # a=0.5/np.log(300) # b=0.5-a*np.log(50) # return np.exp((y-b)/a)
def Graph_Data_Parser(_lbann_input_, num_nodes, node_feature_size, max_edges, num_classes=1): """ A parser for graph structured data with node features, source and target node indices (COO) format, and a target Args: _lbann_input_ (Layer): The input layer of the LBANN model num_nodes (int): The maximum number of nodes in the dataset node_features_size (int): The dimensionality of the node features matrix max_edges (int): The maximum number of edges in the dataset num_classes (int): The number of classes in the target or 1 for regression (default : 1) Returns: (dictionary) Returns a dictionary with the keys: node_features, source_indices, target_indices, and targets """ slice_points = [ 0, num_nodes * node_feature_size, max_edges, max_edges, num_classes ] shifted_slice_points = list(accumulate(slice_points)) sliced_input = lbann.Slice(_lbann_input_, slice_points=str_list(shifted_slice_points), name="Sliced_Graph_Input") node_features = lbann.Reshape(lbann.Identity(sliced_input), dims=str_list([num_nodes, node_feature_size]), name="Node_Feature_Matrix") source_indices = lbann.Identity(sliced_input) target_indices = lbann.Identity(sliced_input) targets = lbann.Identity(sliced_input) graph_data = { "node_features": node_features, "source_indices": source_indices, "target_indices": target_indices, "target": targets } return graph_data
def Gelu_approx(x): # This approximates gelu and may be more performant # return 0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x ** 3))) # Based on: https://paperswithcode.com/method/gelu sqrt_2_over_pi = math.sqrt(2 / math.pi) b_coef = 0.044715 x_cubed = lbann.Multiply(lbann.Multiply(lbann.Identity(x), x), x) inner_tanh_x_comp = lbann.Add(x, lbann.Scale(x_cubed, constant=b_coef)) tanh_x = lbann.Tanh(lbann.Scale(inner_tanh_x_comp, constant=sqrt_2_over_pi)) return lbann.Scale(lbann.Multiply(x, lbann.AddConstant(tanh_x, constant=1)), constant=0.5)
def matrix_to_graph(cls, mat_layer, num_vertices, num_features): """Given a 2D matrix of shape (num_vertices, num_features), returns a GraphVertexData object with num_vertices number of nodes with num_features. """ slice_points = str_list([i for i in range(0,num_vertices * num_features + 1, num_features)]) flattened_layer = lbann.Reshape(mat_layer, dims = str(num_vertices * num_features)) sliced_mat_layer = lbann.Slice(flattened_layer, axis = 0, slice_points = slice_points) list_of_layers = [] for node in range(num_vertices): temp = lbann.Identity(sliced_mat_layer) list_of_layers.append(lbann.Reshape(temp, dims=str_list([1, num_features]))) return cls(list_of_layers, num_features)
def forward_encoder(self, x_emb): """Encoder step, emulating z ~ E(x) = q_E(z|x) :param x_emb: (n_batch, len(x), d_z) of floats, embeddings for input sentence x :return: (n_batch, d_z) of floats, sample of latent vector z :return: float, kl term component of loss """ # _, h = self.encoder_rnn(x, None) h = self.encoder_rnn(x_emb, None) h = lbann.Slice( h, slice_points=str_list( [self.input_feature_dims - 1, self.input_feature_dims]), axis=0, ) h = lbann.Identity(h) z = self.q_mu(h) return z
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann print("Dump model dir ", run_args.dump_model_dir) assert run_args.dump_model_dir, "evaluate script asssumes a pretrained WAE model" pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Identity(lbann.Input(name='inp', data_field='samples'), name='inp1') wae_loss = [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None save_output = True if run_args.dump_outputs_dir else False print("save output? ", save_output, "out dir ", run_args.dump_outputs_dir) z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims=run_args.z_dim) waemodel = molwae.MolWAE(input_feature_dims, dictionary_size, embedding_size, pad_index, run_args.z_dim, save_output) recon, d1_real, d1_fake, d_adv, arg_max = waemodel(input_, z) zero = lbann.Constant(value=0.0, num_neurons='1', name='zero') one = lbann.Constant(value=1.0, num_neurons='1', name='one') d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real, one], name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake, zero], name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv, one], name='d_adv_bce') wae_loss.append(recon) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if (l.weights and "disc0" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2 if (l.weights and "disc1" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_weights = [ w for w in weights if not isinstance(w.optimizer, lbann.NoOptimizer) ] l2_reg = lbann.L2WeightRegularization(weights=l2_weights, scale=1e-4) wae_loss.append(d1_real_bce) wae_loss.append(d_adv_bce) wae_loss.append(d1_fake_bce) wae_loss.append(l2_reg) print("LEN wae loss ", len(wae_loss)) obj = lbann.ObjectiveFunction(wae_loss) # Initialize check metric callback metrics = [ lbann.Metric(d_adv_bce, name='adv_loss'), lbann.Metric(recon, name='recon') ] callbacks = [ lbann.CallbackPrint(), #lbann.CallbackStepLearningRate(step=10, amt=0.5), lbann.CallbackTimer() ] callbacks.append( lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2)) #Dump output (activation) for post processing if (run_args.dump_outputs_dir): pred_tensor = lbann.Concatenation(arg_max, name='pred_tensor') callbacks.append( lbann.CallbackDumpOutputs( batch_interval=run_args.dump_outputs_interval, execution_modes='test', directory=run_args.dump_outputs_dir, layers=f'inp pred_tensor {waemodel.q_mu.name}')) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None, 'should be training seq len + bos + eos' print("sequence length is {}, which is training sequence len + bos + eos".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Input(data_field='samples',name='inp_data') #Note input assumes to come from encoder script concatenation of input smiles + z inp_slice = lbann.Slice(input_, axis=0, slice_points=str_list([0, sequence_length, sequence_length+run_args.z_dim]), name='inp_slice') inp_smile = lbann.Identity(inp_slice,name='inp_smile') z = lbann.Identity(inp_slice, name='z') wae_loss= [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None save_output = True if run_args.dump_outputs_dir else False print("save output? ", save_output, "out dir ", run_args.dump_outputs_dir) #uncomment below for random sampling #z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims=str(run_args.z_dim)) x = lbann.Slice(inp_smile, slice_points=str_list([0, input_feature_dims])) x = lbann.Identity(x) waemodel = molwae.MolWAE(input_feature_dims, dictionary_size, embedding_size, pad_index,run_args.z_dim,save_output=save_output) x_emb = lbann.Embedding( x, num_embeddings=waemodel.dictionary_size, embedding_dim=waemodel.embedding_size, name='emb', weights=waemodel.emb_weights ) pred, arg_max = waemodel.forward_decoder(x_emb,z) recon = waemodel.compute_loss(x, pred) wae_loss.append(recon) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) #l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) #wae_loss.append(l2_reg) print("LEN wae loss ", len(wae_loss)) obj = lbann.ObjectiveFunction(wae_loss) # Initialize check metric callback metrics = [lbann.Metric(recon, name='recon')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] #Dump output (activation) for post processing pred_tensor = lbann.Concatenation(arg_max, name='pred_tensor') conc_out = lbann.Concatenation([input_,pred_tensor], name='conc_out') callbacks.append(lbann.CallbackDumpOutputs(batch_interval=run_args.dump_outputs_interval, execution_modes='test', directory=run_args.dump_outputs_dir, layers=f'{conc_out.name}')) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)
def setup(data_reader_file, name='classifier', num_labels=200, mini_batch_size=128, num_epochs=1000, learning_rate=0.1, bn_statistics_group_size=2, fc_data_layout='model_parallel', warmup_epochs=50, learning_rate_drop_interval=50, learning_rate_drop_factor=0.25, checkpoint_interval=None): # Setup input data input = lbann.Input(target_mode = 'classification') images = lbann.Identity(input) labels = lbann.Identity(input) # Classification network head_cnn = modules.ResNet(bn_statistics_group_size=bn_statistics_group_size) class_fc = lbann.modules.FullyConnectedModule(num_labels, activation=lbann.Softmax, name=f'{name}_fc', data_layout=fc_data_layout) x = head_cnn(images) probs = class_fc(x) # Setup objective function cross_entropy = lbann.CrossEntropy([probs, labels]) l2_reg_weights = set() for l in lbann.traverse_layer_graph(input): if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected: l2_reg_weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002) obj = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Setup model metrics = [lbann.Metric(lbann.CategoricalAccuracy([probs, labels]), name='accuracy', unit='%')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if checkpoint_interval: callbacks.append( lbann.CallbackCheckpoint( checkpoint_dir='ckpt', checkpoint_epochs=5 ) ) # Learning rate schedules if warmup_epochs: callbacks.append( lbann.CallbackLinearGrowthLearningRate( target=learning_rate * mini_batch_size / 128, num_epochs=warmup_epochs ) ) if learning_rate_drop_factor: callbacks.append( lbann.CallbackDropFixedLearningRate( drop_epoch=list(range(0, num_epochs, learning_rate_drop_interval)), amt=learning_rate_drop_factor) ) # Construct model model = lbann.Model(num_epochs, layers=lbann.traverse_layer_graph(input), objective_function=obj, metrics=metrics, callbacks=callbacks) # Setup optimizer # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8) opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9) # Load data reader from prototext data_reader_proto = lbann.lbann_pb2.LbannPB() with open(data_reader_file, 'r') as f: google.protobuf.text_format.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader for reader_proto in data_reader_proto.reader: reader_proto.python.module_dir = os.path.dirname(os.path.realpath(__file__)) # Return experiment objects return model, data_reader_proto, opt
parser = argparse.ArgumentParser(description=desc) lbann.contrib.args.add_scheduler_arguments(parser) parser.add_argument('--job-name', action='store', default='lbann_lenet', type=str, help='scheduler job name (default: lbann_lenet)') args = parser.parse_args() # ---------------------------------- # Construct layer graph # ---------------------------------- # Input data input_ = lbann.Input(target_mode='classification') images = lbann.Identity(input_) labels = lbann.Identity(input_) # LeNet x = lbann.Convolution(images, num_dims=2, num_output_channels=6, num_groups=1, conv_dims_i=5, conv_strides_i=1, conv_dilations_i=1, has_bias=True) x = lbann.Relu(x) x = lbann.Pooling(x, num_dims=2, pool_dims_i=2,
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Identity(lbann.Input(name='inp', target_mode="N/A"), name='inp1') vae_loss = [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None kl, recon = molvae.MolVAE(input_feature_dims, dictionary_size, embedding_size, pad_index)(input_) vae_loss.append(kl) vae_loss.append(recon) print("LEN vae loss ", len(vae_loss)) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=weights, scale=5e-4) obj = lbann.ObjectiveFunction(vae_loss) # Initialize check metric callback metrics = [ lbann.Metric(kl, name='kl_loss'), lbann.Metric(recon, name='recon') ] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if (run_args.dump_weights_interval > 0): callbacks.append( lbann.CallbackDumpWeights( directory=run_args.dump_weights_dir, epoch_interval=run_args.dump_weights_interval)) if (run_args.ltfb): send_name = ('' if run_args.weights_to_send == 'All' else run_args.weights_to_send) #hack for Merlin empty string weights_to_ex = [w.name for w in weights if send_name in w.name] print("LTFB Weights to exchange ", weights_to_ex) callbacks.append( lbann.CallbackLTFB(batch_interval=run_args.ltfb_batch_interval, metric='recon', weights=list2str(weights_to_ex), low_score_wins=True, exchange_hyperparameters=True)) if (run_args.warmup): callbacks.append( lbann.CallbackLinearGrowthLearningRate(target=run_args.lr / 512 * run_args.batch_size, num_epochs=5)) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann print("Dump model dir ", run_args.dump_model_dir) assert run_args.dump_model_dir, "evaluate script asssumes a pretrained WAE model" pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Identity(lbann.Input(name='inp', target_mode="N/A"), name='inp1') wae_loss = [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None save_output = False print("save output? ", save_output, "out dir ", run_args.dump_outputs_dir) z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims="128") x = lbann.Slice(input_, slice_points=str_list([0, input_feature_dims])) x = lbann.Identity(x) waemodel = molwae.MolWAE(input_feature_dims, dictionary_size, embedding_size, pad_index, save_output) x_emb = lbann.Embedding(x, num_embeddings=waemodel.dictionary_size, embedding_dim=waemodel.embedding_size, name='emb', weights=waemodel.emb_weights) latentz = waemodel.forward_encoder(x_emb) fake_loss = lbann.MeanAbsoluteError(latentz, z) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) obj = lbann.ObjectiveFunction(fake_loss) callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] #Dump output (activation) for post processing conc_out = lbann.Concatenation([input_, latentz], name='conc_out') callbacks.append( lbann.CallbackDumpOutputs( batch_interval=run_args.dump_outputs_interval, execution_modes='test', directory=run_args.dump_outputs_dir, layers=f'{conc_out.name}')) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, callbacks=callbacks)
name='decoder_embeddings', ) # ---------------------------------- # Construct layer graph # ---------------------------------- # Properties of graph and random walk num_graph_nodes = dataset.max_graph_node_id() + 1 walk_length = dataset.walk_context_length num_negative_samples = dataset.num_negative_samples input_size = dataset.sample_dims()[0] # Embedding vectors, including negative sampling # Note: Input is sequence of graph node IDs input_ = lbann.Identity(lbann.Input()) input_slice = lbann.Slice( input_, slice_points=f'0 {num_negative_samples+1} {input_size}' ) decoder_embeddings = lbann.Embedding( input_slice, weights=decoder_embeddings_weights, num_embeddings=num_graph_nodes, embedding_dim=args.latent_dim, ) encoder_embeddings = lbann.Embedding( input_slice, weights=encoder_embeddings_weights, num_embeddings=num_graph_nodes, embedding_dim=args.latent_dim,
embeddings = lbann.Embedding( input_, weights=embeddings_weights, num_embeddings=num_vertices, embedding_dim=args.latent_dim, ) else: raise RuntimeError( f'unknown method to get embedding vectors ({args.embeddings})') embeddings_slice = lbann.Slice( embeddings, axis=0, slice_points=utils.str_list( [0, num_negative_samples, num_negative_samples + walk_length]), ) negative_samples_embeddings = lbann.Identity(embeddings_slice) walk_embeddings = lbann.Identity(embeddings_slice) # Skip-Gram objective function positive_loss = model.skip_gram.positive_samples_loss( walk_length, lbann.Identity(walk_embeddings), lbann.Identity(walk_embeddings), scale_decay=0.8, ) negative_loss = model.skip_gram.negative_samples_loss( walk_embeddings, negative_samples_embeddings, ) obj.append(positive_loss) obj.append(lbann.WeightedSum(negative_loss, scaling_factors='2'))
def compute_loss(self, x, y): # y[:, :-1] y = lbann.Slice( y, axis=0, slice_points=str_list([0, self.input_feature_dims - 1]), ) y = lbann.Identity(y) # x[:, 1:] x = lbann.Slice( x, slice_points=str_list([1, self.input_feature_dims]), ) x = lbann.Identity(x) # Convert indices in x to one-hot representation # Note: Ignored indices result in zero vectors ignore_mask = lbann.Equal( x, self.constant(self.label_to_ignore, hint_layer=x), ) keep_mask = lbann.LogicalNot(ignore_mask) length = lbann.Reduction(keep_mask, mode='sum') length = lbann.Max(length, self.constant(1, [1])) x = lbann.Add( lbann.Multiply(keep_mask, x), lbann.Multiply(ignore_mask, self.constant(-1, hint_layer=x)), ) x = lbann.Slice(x, slice_points=str_list(range(self.input_feature_dims))) x = [lbann.Identity(x) for _ in range(self.input_feature_dims - 1)] x = [lbann.OneHot(xi, size=self.dictionary_size) for xi in x] x = [ lbann.Reshape(xi, dims=str_list([1, self.dictionary_size])) for xi in x ] x = lbann.Concatenation(x, axis=0) # recon_loss = F.cross_entropy( # y[:, :-1].contiguous().view(-1, y.size(-1)), # x[:, 1:].contiguous().view(-1), # ignore_index=self.pad # ) # Note: Ideally we'd shift y by y.max(-1) for numerical stability shifts = lbann.MatMul( lbann.Max(y, self.constant(0, hint_layer=y)), self.constant( 1 / math.sqrt(self.dictionary_size), [self.dictionary_size, self.dictionary_size], ), ) y = lbann.Subtract(y, shifts) z = lbann.MatMul( lbann.Exp(y), self.constant(1, [self.dictionary_size, 1]), ) z = lbann.Log(z) z = lbann.MatMul( lbann.Reshape(keep_mask, dims=str_list([1, -1])), z, ) recon_loss = lbann.MatMul( lbann.Reshape(y, dims=str_list([1, -1])), lbann.Reshape(x, dims=str_list([1, -1])), transpose_b=True, ) recon_loss = lbann.Subtract(z, recon_loss) recon_loss = lbann.Reshape(recon_loss, dims=str_list([1])) recon_loss = lbann.Divide(recon_loss, length) return recon_loss