def setup_experiment(lbann): """Construct LBANN experiment. args: lbann (module): Module for LBANN Python frontend """ trainer = lbann.Trainer(mini_batch_size=mini_batch_size) callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackGPUMemoryUsage() ] model = Sparse_Graph_Trainer.make_model(kernel_type='GatedGraph', num_epochs=num_epochs, callbacks=callbacks) reader = data.PROTEINS.make_data_reader() # No validation set optimizer = lbann.Adam(learn_rate=0.01, beta1=0.9, beta2=0.99, eps=1e-8) return trainer, model, reader, optimizer
def make_model( motif_size, walk_length, num_vertices, embed_dim, learn_rate, num_epochs, embeddings_dir, ): # Layer graph input_ = lbann.Slice( lbann.Input(data_field='samples'), slice_points=str_list([0, motif_size, motif_size+walk_length]), ) motif_indices = lbann.Identity(input_) walk_indices = lbann.Identity(input_) gan = model.gan.CommunityGAN( num_vertices, motif_size, embed_dim, learn_rate, ) loss, real_disc_prob, fake_disc_prob, gen_prob = gan( motif_indices, motif_size, walk_indices, walk_length, ) # Metrics metrics = [ lbann.Metric(real_disc_prob, name='D(real)'), lbann.Metric(fake_disc_prob, name='D(fake)'), lbann.Metric(gen_prob, name='G'), ] # Callbacks callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDumpWeights(directory=embeddings_dir, epoch_interval=num_epochs), ] # Perform computation at double precision for l in lbann.traverse_layer_graph(input_): l.datatype = lbann.DataType.DOUBLE for w in l.weights: w.datatype = lbann.DataType.DOUBLE # Contruct model return lbann.Model( num_epochs, layers=lbann.traverse_layer_graph(input_), objective_function=loss, metrics=metrics, callbacks=callbacks, )
def set_up_experiment(args, input_, probs, labels): # Set up objective function cross_entropy = lbann.CrossEntropy([probs, labels]) layers = list(lbann.traverse_layer_graph(input_)) weights = set() for l in layers: weights.update(l.weights) # scale = weight decay l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) objective_function = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Set up model top1 = lbann.CategoricalAccuracy([probs, labels]) top5 = lbann.TopKCategoricalAccuracy([probs, labels], k=5) metrics = [lbann.Metric(top1, name='top-1 accuracy', unit='%'), lbann.Metric(top5, name='top-5 accuracy', unit='%')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDropFixedLearningRate( drop_epoch=[30, 60], amt=0.1)] model = lbann.Model(args.mini_batch_size, args.num_epochs, layers=layers, weights=weights, objective_function=objective_function, metrics=metrics, callbacks=callbacks) # Load data reader from prototext data_reader_proto = lbann.lbann_pb2.LbannPB() with open(args.data_reader, 'r') as f: txtf.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader # Set up optimizer if args.optimizer == 'sgd': print('Creating sgd optimizer') optimizer = lbann.optimizer.SGD( learn_rate=args.optimizer_learning_rate, momentum=0.9, nesterov=True ) else: optimizer = lbann.contrib.args.create_optimizer(args) # Save prototext to args.prototext if args.prototext: lbann.proto.save_prototext(args.prototext, model=model, optimizer=optimizer, data_reader=data_reader_proto) return model, data_reader_proto, optimizer
def construct_model(): """Model description """ import lbann import lbann.modules fc = lbann.modules.FullyConnectedModule conv = lbann.modules.Convolution2dModule conv1 = conv(20, 3, stride=1, padding=1, name='conv1') conv2 = conv(20, 3, stride=1, padding=1, name='conv2') fc1 = fc(100, name='fc1') fc2 = fc(20, name='fc2') fc3 = fc(num_classes, name='fc3') # Layer graph input = lbann.Input(name='inp_tensor', target_mode='classification') inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0, dims - 1, dims]), name='inp_slice') xdata = lbann.Identity(inp_slice) ylabel = lbann.Identity(inp_slice, name='gt_y') #NHWC to NCHW x = lbann.Reshape(xdata, dims='14 13 13') x = conv2(conv1(x)) x = lbann.Reshape(x, dims='3380') x = lbann.Dropout(lbann.Relu(fc1(x)), keep_prob=0.5) x = lbann.Dropout(fc2(x), keep_prob=0.5) pred = lbann.Softmax(fc3(x)) gt_label = lbann.OneHot(ylabel, size=num_classes) loss = lbann.CrossEntropy([pred, gt_label], name='loss') acc = lbann.CategoricalAccuracy([pred, gt_label]) layers = list(lbann.traverse_layer_graph(input)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) obj = lbann.ObjectiveFunction(loss) callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] # Construct model num_epochs = 10 return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=[lbann.Metric(acc, name='accuracy', unit='%')], objective_function=obj, callbacks=callbacks)
def set_up_experiment(args, input_, probs, labels): # Set up objective function cross_entropy = lbann.CrossEntropy([probs, labels]) layers = list(lbann.traverse_layer_graph(input_)) l2_reg_weights = set() for l in layers: if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected: l2_reg_weights.update(l.weights) # scale = weight decay l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=1e-4) objective_function = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Set up model top1 = lbann.CategoricalAccuracy([probs, labels]) top5 = lbann.TopKCategoricalAccuracy([probs, labels], k=5) metrics = [ lbann.Metric(top1, name='top-1 accuracy', unit='%'), lbann.Metric(top5, name='top-5 accuracy', unit='%') ] callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDropFixedLearningRate(drop_epoch=[30, 60], amt=0.1) ] model = lbann.Model(args.num_epochs, layers=layers, objective_function=objective_function, metrics=metrics, callbacks=callbacks) # Set up data reader data_reader = data.imagenet.make_data_reader(num_classes=args.num_classes) # Set up optimizer if args.optimizer == 'sgd': print('Creating sgd optimizer') optimizer = lbann.optimizer.SGD( learn_rate=args.optimizer_learning_rate, momentum=0.9, nesterov=True) else: optimizer = lbann.contrib.args.create_optimizer(args) # Setup trainer trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size) return trainer, model, data_reader, optimizer
def construct_model(): """Construct LBANN model. Pilot1 Combo model """ import lbann # Layer graph data = lbann.Input(data_field='samples') responses = lbann.Input(data_field='responses') pred = combo.Combo()(data) mse = lbann.MeanSquaredError([responses, pred]) SS_res = lbann.Reduction(lbann.Square(lbann.Subtract(responses, pred)), mode='sum') #SS_tot = var(x) = mean((x-mean(x))^2) mini_batch_size = lbann.MiniBatchSize() mean = lbann.Divide(lbann.BatchwiseReduceSum(responses), mini_batch_size) SS_tot = lbann.Divide( lbann.BatchwiseReduceSum(lbann.Square(lbann.Subtract(responses, mean))), mini_batch_size) eps = lbann.Constant(value=1e-07, hint_layer=SS_tot) r2 = lbann.Subtract(lbann.Constant(value=1, num_neurons='1'), lbann.Divide(SS_res, lbann.Add(SS_tot, eps))) metrics = [lbann.Metric(mse, name='mse')] metrics.append(lbann.Metric(r2, name='r2')) callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] # Construct model num_epochs = 100 layers = list(lbann.traverse_layer_graph([data, responses])) return lbann.Model(num_epochs, layers=layers, metrics=metrics, objective_function=mse, callbacks=callbacks)
def make_model( data_dim, latent_dim, num_epochs, ): # Layer graph data = lbann.Input(data_field='samples') autoencoder = model.autoencoder.FullyConnectedAutoencoder( data_dim, latent_dim, ) reconstructed = autoencoder(data) loss = lbann.MeanSquaredError(data, reconstructed) # Metrics metrics = [ lbann.Metric(loss, name='loss'), ] # Callbacks callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDumpWeights(directory='weights', epoch_interval=num_epochs), ] # Contruct model return lbann.Model( num_epochs, layers=lbann.traverse_layer_graph(loss), objective_function=loss, metrics=metrics, callbacks=callbacks, )
def construct_model(): """Construct LBANN model. ExaGAN model """ import lbann # Layer graph input = lbann.Input(target_mode='N/A',name='inp_img') #label flipping label_flip_rand = lbann.Uniform(min=0,max=1, neuron_dims='1') label_flip_prob = lbann.Constant(value=0.01, num_neurons='1') one = lbann.GreaterEqual(label_flip_rand,label_flip_prob, name='is_real') zero = lbann.LogicalNot(one,name='is_fake') z = lbann.Reshape(lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="64", name='noise_vec'),dims='1 64') d1_real, d1_fake, d_adv, gen_img = ExaGAN.CosmoGAN()(input,z) d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce') layers = list(lbann.traverse_layer_graph(input)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if(l.weights and "disc1" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2, analogous to discrim.trainable=False in Keras if(l.weights and "disc2" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) #l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) obj = lbann.ObjectiveFunction([d1_real_bce,d1_fake_bce,d_adv_bce]) # Initialize check metric callback metrics = [lbann.Metric(d1_real_bce,name='d_real'), lbann.Metric(d1_fake_bce, name='d_fake'), lbann.Metric(d_adv_bce,name='gen')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer(), #Uncomment to dump output for plotting and further statistical analysis #lbann.CallbackDumpOutputs(layers='inp_img gen_img_instance1_activation', # execution_modes='train validation', # directory='dump_outs', # batch_interval=100, # format='npy'), lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2)] # Construct model num_epochs = 20 return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=obj, callbacks=callbacks)
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Identity(lbann.Input(name='inp', target_mode="N/A"), name='inp1') vae_loss = [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None kl, recon = molvae.MolVAE(input_feature_dims, dictionary_size, embedding_size, pad_index)(input_) vae_loss.append(kl) vae_loss.append(recon) print("LEN vae loss ", len(vae_loss)) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=weights, scale=5e-4) obj = lbann.ObjectiveFunction(vae_loss) # Initialize check metric callback metrics = [ lbann.Metric(kl, name='kl_loss'), lbann.Metric(recon, name='recon') ] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if (run_args.dump_weights_interval > 0): callbacks.append( lbann.CallbackDumpWeights( directory=run_args.dump_weights_dir, epoch_interval=run_args.dump_weights_interval)) if (run_args.ltfb): send_name = ('' if run_args.weights_to_send == 'All' else run_args.weights_to_send) #hack for Merlin empty string weights_to_ex = [w.name for w in weights if send_name in w.name] print("LTFB Weights to exchange ", weights_to_ex) callbacks.append( lbann.CallbackLTFB(batch_interval=run_args.ltfb_batch_interval, metric='recon', weights=list2str(weights_to_ex), low_score_wins=True, exchange_hyperparameters=True)) if (run_args.warmup): callbacks.append( lbann.CallbackLinearGrowthLearningRate(target=run_args.lr / 512 * run_args.batch_size, num_epochs=5)) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)
def construct_model(): """Construct MACC surrogate model. See https://arxiv.org/pdf/1912.08113.pdf model architecture and other details """ import lbann # Layer graph input = lbann.Input(data_field='samples', name='inp_data') # data is 64*64*4 images + 15 scalar + 5 param inp_slice = lbann.Slice(input, axis=0, slice_points=str_list( [0, args.ydim, args.ydim + args.xdim]), name='inp_slice') gt_y = lbann.Identity(inp_slice, name='gt_y') gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used zero = lbann.Constant(value=0.0, num_neurons='1', name='zero') one = lbann.Constant(value=1.0, num_neurons='1', name='one') z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims="20") wae = macc_models.MACCWAE(args.zdim, args.ydim, cf=args.wae_mcf, use_CNN=args.useCNN) #pretrained, freeze inv = macc_models.MACCInverse(args.xdim, cf=args.surrogate_mcf) fwd = macc_models.MACCForward(args.zdim, cf=args.surrogate_mcf) y_pred_fwd = wae.encoder(gt_y) param_pred_ = wae.encoder(gt_y) input_fake = inv(param_pred_) output_cyc = fwd(input_fake) y_image_re2 = wae.decoder(output_cyc) '''**** Train cycleGAN input params <--> latent space of (images, scalars) ****''' output_fake = fwd(gt_x) y_image_re = wae.decoder(output_fake) y_out = wae.decoder(y_pred_fwd) param_pred2_ = wae.encoder(y_image_re) input_cyc = inv(param_pred2_) L_l2_x = lbann.MeanSquaredError( input_fake, gt_x) #(x,inv(enc(y)), (encoder+)inverse loss L_cyc_x = lbann.MeanSquaredError( input_cyc, gt_x) #param, x cycle loss, from latent space L_l2_y = lbann.MeanSquaredError( output_fake, y_pred_fwd) #pred error into latent space (enc(y),fw(x)) L_cyc_y = lbann.MeanSquaredError( output_cyc, y_pred_fwd) # pred error into latent space (enc(y), fw(inv(enc(y)))) #@todo slice here to separate scalar from image img_sca_loss = lbann.MeanSquaredError( y_image_re, gt_y) # (y,dec(fw(x))) #forward model to decoder, no latent space dec_fw_inv_enc_y = lbann.MeanSquaredError( y_image_re2, gt_y) #(y, dec(fw(inv(enc(y))))) y->enc_z->x'->fw_z->y' wae_loss = lbann.MeanSquaredError(y_out, gt_y) #(y, dec(enc(y)) ' #L_cyc = L_cyc_y + L_cyc_x L_cyc = lbann.Add(L_cyc_y, L_cyc_x) #loss_gen0 = L_l2_y + lamda_cyc*L_cyc loss_gen0 = lbann.WeightedSum([L_l2_y, L_cyc], scaling_factors=f'1 {args.lamda_cyc}') loss_gen1 = lbann.WeightedSum([L_l2_x, L_cyc_y], scaling_factors=f'1 {args.lamda_cyc}') #loss_gen1 = L_l2_x + lamda_cyc*L_cyc_y conc_out = lbann.Concatenation( [gt_x, wae_loss, img_sca_loss, dec_fw_inv_enc_y, L_l2_x], name='x_errors') layers = list(lbann.traverse_layer_graph(input)) weights = set() for l in layers: weights.update(l.weights) # Setup objective function obj = lbann.ObjectiveFunction([loss_gen0, loss_gen1]) # Initialize check metric callback metrics = [ lbann.Metric(img_sca_loss, name='img_re1'), lbann.Metric(dec_fw_inv_enc_y, name='img_re2'), lbann.Metric(wae_loss, name='wae_loss'), lbann.Metric(L_l2_x, name='inverse loss'), lbann.Metric(L_cyc_y, name='output cycle loss'), lbann.Metric(L_cyc_x, name='param cycle loss') ] callbacks = [ lbann.CallbackPrint(), lbann.CallbackDumpOutputs(layers=f'{conc_out.name}', execution_modes='test', directory=args.dump_outputs, batch_interval=1, format='npy'), lbann.CallbackTimer() ] # Construct model num_epochs = 1 return lbann.Model(num_epochs, weights=weights, layers=layers, serialize_io=True, metrics=metrics, objective_function=obj, callbacks=callbacks)
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann print("Dump model dir ", run_args.dump_model_dir) assert run_args.dump_model_dir, "evaluate script asssumes a pretrained WAE model" pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Identity(lbann.Input(name='inp', target_mode="N/A"), name='inp1') wae_loss = [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None save_output = False print("save output? ", save_output, "out dir ", run_args.dump_outputs_dir) z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims="128") x = lbann.Slice(input_, slice_points=str_list([0, input_feature_dims])) x = lbann.Identity(x) waemodel = molwae.MolWAE(input_feature_dims, dictionary_size, embedding_size, pad_index, save_output) x_emb = lbann.Embedding(x, num_embeddings=waemodel.dictionary_size, embedding_dim=waemodel.embedding_size, name='emb', weights=waemodel.emb_weights) latentz = waemodel.forward_encoder(x_emb) fake_loss = lbann.MeanAbsoluteError(latentz, z) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) obj = lbann.ObjectiveFunction(fake_loss) callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] #Dump output (activation) for post processing conc_out = lbann.Concatenation([input_, latentz], name='conc_out') callbacks.append( lbann.CallbackDumpOutputs( batch_interval=run_args.dump_outputs_interval, execution_modes='test', directory=run_args.dump_outputs_dir, layers=f'{conc_out.name}')) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, callbacks=callbacks)
def construct_model(num_epochs, mcr, save_batch_interval=82): """Construct LBANN model. """ import lbann # Layer graph input = lbann.Input(target_mode='N/A', name='inp_img') #============================================== ##Create the noise vector z = lbann.Reshape(lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims="64", name='noise_vec'), dims='1 64') ## Creating the GAN object and implementing forward pass for both networks ### gen_img = model_GAN.CosmoGAN(mcr)(input, z, mcr) # #============================================== # ### Set up source and destination layers # layers = list(lbann.traverse_layer_graph(input)) # weights = set() # src_layers,dst_layers = [],[] # for l in layers: # if(l.weights and "disc1" in l.name and "instance1" in l.name): # src_layers.append(l.name) # #freeze weights in disc2, analogous to discrim.trainable=False in Keras # if(l.weights and "disc2" in l.name): # dst_layers.append(l.name) # for idx in range(len(l.weights)): # l.weights[idx].optimizer = lbann.NoOptimizer() # weights.update(l.weights) # #============================================== # ### Define Loss and Metrics # #Define loss (Objective function) # loss_list=[d1_real_bce,d1_fake_bce,d_adv_bce] ## Usual GAN loss function # # loss_list.append(l2_reg) # loss = lbann.ObjectiveFunction(loss_list) # #Define metrics # metrics = [lbann.Metric(d1_real_bce,name='d_real'),lbann.Metric(d1_fake_bce, name='d_fake'), lbann.Metric(d_adv_bce,name='gen'), # #lbann.Metric(img_loss, name='msq_error') ,lbann.Metric(l1_loss, name='l1norm_error') # # ,lbann.Metric(l2_reg) # ] #============================================== ### Define callbacks list callbacks_list = [] print_model = False fname = '' callbacks_list.append(lbann.CallbackPrint()) callbacks_list.append(lbann.CallbackTimer()) callbacks_list.append(lbann.CallbackLoadModel(dirs=str(fname))) if print_model: callbacks_list.append(lbann.CallbackPrintModelDescription()) ### Construct model return lbann.Model( num_epochs, # weights=weights, # layers=layers, # metrics=metrics, # objective_function=loss, callbacks=callbacks_list)
def construct_model(num_epochs,mcr,spectral_loss,save_batch_interval): """Construct LBANN model. """ import lbann # Layer graph input = lbann.Input(target_mode='N/A',name='inp_img') ### Create expected labels for real and fake data (with label flipping = 0.01) prob_flip=0.01 label_flip_rand = lbann.Uniform(min=0,max=1, neuron_dims='1') label_flip_prob = lbann.Constant(value=prob_flip, num_neurons='1') ones = lbann.GreaterEqual(label_flip_rand,label_flip_prob, name='is_real') zeros = lbann.LogicalNot(ones,name='is_fake') gen_ones=lbann.Constant(value=1.0,num_neurons='1')## All ones: no flip. Input for training Generator. #============================================== ### Implement GAN ##Create the noise vector z = lbann.Reshape(lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="64", name='noise_vec'),dims='1 64') ## Creating the GAN object and implementing forward pass for both networks ### d1_real, d1_fake, d_adv, gen_img, img = ExaGAN.CosmoGAN(mcr)(input,z,mcr) #============================================== ### Compute quantities for adding to Loss and Metrics d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,ones],name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zeros],name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,gen_ones],name='d_adv_bce') #img_loss = lbann.MeanSquaredError([gen_img,img]) #l1_loss = lbann.L1Norm(lbann.WeightedSum([gen_img,img], scaling_factors="1 -1")) #============================================== ### Set up source and destination layers layers = list(lbann.traverse_layer_graph(input)) weights = set() src_layers,dst_layers = [],[] for l in layers: if(l.weights and "disc1" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2, analogous to discrim.trainable=False in Keras if(l.weights and "disc2" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) #l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) #============================================== ### Define Loss and Metrics #Define loss (Objective function) loss_list=[d1_real_bce,d1_fake_bce,d_adv_bce] ## Usual GAN loss function # loss_list=[d1_real_bce,d1_fake_bce] ## skipping adversarial loss for G for testing spectral loss if spectral_loss: dft_gen_img = lbann.DFTAbs(gen_img) dft_img = lbann.StopGradient(lbann.DFTAbs(img)) spec_loss = lbann.Log(lbann.MeanSquaredError(dft_gen_img, dft_img)) loss_list.append(lbann.LayerTerm(spec_loss, scale=8.0)) loss = lbann.ObjectiveFunction(loss_list) #Define metrics metrics = [lbann.Metric(d1_real_bce,name='d_real'),lbann.Metric(d1_fake_bce, name='d_fake'), lbann.Metric(d_adv_bce,name='gen_adv')] if spectral_loss: metrics.append(lbann.Metric(spec_loss,name='spec_loss')) #============================================== ### Define callbacks list callbacks_list=[] dump_outputs=True save_model=False print_model=False callbacks_list.append(lbann.CallbackPrint()) callbacks_list.append(lbann.CallbackTimer()) callbacks_list.append(lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers),batch_interval=1)) if dump_outputs: #callbacks_list.append(lbann.CallbackDumpOutputs(layers='inp_img gen_img_instance1_activation', execution_modes='train validation', directory='dump_outs',batch_interval=save_batch_interval,format='npy')) callbacks_list.append(lbann.CallbackDumpOutputs(layers='gen_img_instance1_activation', execution_modes='train validation', directory='dump_outs',batch_interval=save_batch_interval,format='npy')) if save_model : callbacks_list.append(lbann.CallbackSaveModel(dir='models')) if print_model: callbacks_list.append(lbann.CallbackPrintModelDescription()) ### Construct model return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=loss, callbacks=callbacks_list)
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular SMILES generation Network architecture and training hyperparameters from https://github.com/samadejacobs/moses/tree/master/moses/char_rnn """ pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph _input = lbann.Input(name="inp_tensor", data_field='samples') print(sequence_length) x_slice = lbann.Slice( _input, axis=0, slice_points=str_list(range(sequence_length + 1)), name="inp_slice", ) # embedding layer emb = [] embedding_dim = run_args.embedding_dim num_embeddings = run_args.num_embeddings assert embedding_dim is not None assert num_embeddings is not None emb_weights = lbann.Weights( initializer=lbann.NormalInitializer(mean=0, standard_deviation=1), name="emb_matrix", ) lstm1 = lbann.modules.GRU(size=run_args.hidden, data_layout=data_layout) fc = lbann.modules.FullyConnectedModule(size=num_embeddings, data_layout=data_layout) last_output = lbann.Constant( value=0.0, num_neurons="{}".format(run_args.hidden), data_layout=data_layout, name="lstm_init_output", ) lstm1_prev_state = [last_output] loss = [] idl = [] for i in range(sequence_length): idl.append( lbann.Identity(x_slice, name="slice_idl_" + str(i), device="CPU")) for i in range(sequence_length - 1): emb_l = lbann.Embedding( idl[i], name="emb_" + str(i), weights=emb_weights, embedding_dim=embedding_dim, num_embeddings=num_embeddings, ) x, lstm1_prev_state = lstm1(emb_l, lstm1_prev_state) fc_l = fc(x) y_soft = lbann.Softmax(fc_l, name="soft_" + str(i)) gt = lbann.OneHot(idl[i + 1], size=num_embeddings) ce = lbann.CrossEntropy([y_soft, gt], name="loss_" + str(i)) # mask padding in input pad_mask = lbann.NotEqual( [idl[i], lbann.Constant(value=pad_index, num_neurons="1")], ) ce_mask = lbann.Multiply([pad_mask, ce], name="loss_mask_" + str(i)) loss.append(lbann.LayerTerm(ce_mask, scale=1 / (sequence_length - 1))) layers = list(lbann.traverse_layer_graph(_input)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) obj = lbann.ObjectiveFunction(loss) callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackStepLearningRate(step=run_args.step_size, amt=run_args.gamma), lbann.CallbackDumpWeights(directory=run_args.dump_weights_dir, epoch_interval=1), ] # Construct model return lbann.Model(run_args.num_epochs, layers=layers, weights=weights, objective_function=obj, callbacks=callbacks)
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None, 'should be training seq len + bos + eos' print("sequence length is {}, which is training sequence len + bos + eos".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Input(data_field='samples',name='inp_data') #Note input assumes to come from encoder script concatenation of input smiles + z inp_slice = lbann.Slice(input_, axis=0, slice_points=str_list([0, sequence_length, sequence_length+run_args.z_dim]), name='inp_slice') inp_smile = lbann.Identity(inp_slice,name='inp_smile') z = lbann.Identity(inp_slice, name='z') wae_loss= [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None save_output = True if run_args.dump_outputs_dir else False print("save output? ", save_output, "out dir ", run_args.dump_outputs_dir) #uncomment below for random sampling #z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims=str(run_args.z_dim)) x = lbann.Slice(inp_smile, slice_points=str_list([0, input_feature_dims])) x = lbann.Identity(x) waemodel = molwae.MolWAE(input_feature_dims, dictionary_size, embedding_size, pad_index,run_args.z_dim,save_output=save_output) x_emb = lbann.Embedding( x, num_embeddings=waemodel.dictionary_size, embedding_dim=waemodel.embedding_size, name='emb', weights=waemodel.emb_weights ) pred, arg_max = waemodel.forward_decoder(x_emb,z) recon = waemodel.compute_loss(x, pred) wae_loss.append(recon) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) #l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) #wae_loss.append(l2_reg) print("LEN wae loss ", len(wae_loss)) obj = lbann.ObjectiveFunction(wae_loss) # Initialize check metric callback metrics = [lbann.Metric(recon, name='recon')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] #Dump output (activation) for post processing pred_tensor = lbann.Concatenation(arg_max, name='pred_tensor') conc_out = lbann.Concatenation([input_,pred_tensor], name='conc_out') callbacks.append(lbann.CallbackDumpOutputs(batch_interval=run_args.dump_outputs_interval, execution_modes='test', directory=run_args.dump_outputs_dir, layers=f'{conc_out.name}')) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)
def setup(num_patches=3, mini_batch_size=512, num_epochs=75, learning_rate=0.005, bn_statistics_group_size=2, fc_data_layout='model_parallel', warmup=True, checkpoint_interval=None): # Data dimensions patch_dims = patch_generator.patch_dims num_labels = patch_generator.num_labels(num_patches) # Extract tensors from data sample input = lbann.Input() slice_points = [0] for _ in range(num_patches): patch_size = functools.reduce(operator.mul, patch_dims) slice_points.append(slice_points[-1] + patch_size) slice_points.append(slice_points[-1] + num_labels) sample = lbann.Slice(input, slice_points=str_list(slice_points)) patches = [ lbann.Reshape(sample, dims=str_list(patch_dims)) for _ in range(num_patches) ] labels = lbann.Identity(sample) # Siamese network head_cnn = modules.ResNet( bn_statistics_group_size=bn_statistics_group_size) heads = [head_cnn(patch) for patch in patches] heads_concat = lbann.Concatenation(heads) # Classification network class_fc1 = modules.FcBnRelu( 4096, statistics_group_size=bn_statistics_group_size, name='siamese_class_fc1', data_layout=fc_data_layout) class_fc2 = modules.FcBnRelu( 4096, statistics_group_size=bn_statistics_group_size, name='siamese_class_fc2', data_layout=fc_data_layout) class_fc3 = lbann.modules.FullyConnectedModule(num_labels, activation=lbann.Softmax, name='siamese_class_fc3', data_layout=fc_data_layout) x = class_fc1(heads_concat) x = class_fc2(x) probs = class_fc3(x) # Setup objective function cross_entropy = lbann.CrossEntropy([probs, labels]) l2_reg_weights = set() for l in lbann.traverse_layer_graph(input): if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected: l2_reg_weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002) obj = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Setup model metrics = [ lbann.Metric(lbann.CategoricalAccuracy([probs, labels]), name='accuracy', unit='%') ] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if checkpoint_interval: callbacks.append( lbann.CallbackCheckpoint(checkpoint_dir='ckpt', checkpoint_epochs=5)) # Learning rate schedules if warmup: callbacks.append( lbann.CallbackLinearGrowthLearningRate(target=learning_rate * mini_batch_size / 128, num_epochs=5)) callbacks.append( lbann.CallbackDropFixedLearningRate(drop_epoch=list(range(0, 100, 15)), amt=0.25)) # Construct model model = lbann.Model(num_epochs, layers=lbann.traverse_layer_graph(input), objective_function=obj, metrics=metrics, callbacks=callbacks) # Setup optimizer opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9) # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8) # Setup data reader data_reader = make_data_reader(num_patches) # Return experiment objects return model, data_reader, opt
def make_model( num_epochs, embed_dim, num_heads, label_smoothing, ): # Embedding weights var = 2 / (embed_dim + vocab_size) # Glorot initialization embedding_weights = lbann.Weights( name='embeddings', initializer=lbann.NormalInitializer(standard_deviation=math.sqrt(var)), ) # Input is two sequences of token IDs input_ = lbann.Input(data_field='samples') # Get sequences of embedding vectors # Note: Scale embeddings by sqrt(embed_dim). # Note: Decoder input is shifted right, so embedding for last # token isn't needed. embeddings_tokens = lbann.Identity( lbann.Slice( input_, axis=0, slice_points=str_list([0, 2 * sequence_length - 1]), )) embeddings = lbann.Embedding( embeddings_tokens, weights=embedding_weights, num_embeddings=vocab_size, embedding_dim=embed_dim, padding_idx=pad_index, ) embeddings = lbann.WeightedSum( embeddings, scaling_factors=str(math.sqrt(embed_dim)), ) embeddings_slice = lbann.Slice( embeddings, axis=0, slice_points=str_list([0, sequence_length, 2 * sequence_length - 1]), ) encoder_input = lbann.Identity(embeddings_slice) decoder_input = lbann.Identity(embeddings_slice) # Apply transformer model transformer = lbann.models.Transformer( hidden_size=embed_dim, num_heads=num_heads, name='transformer', ) result = transformer( encoder_input, sequence_length, decoder_input, sequence_length - 1, ) # Reconstruct decoder input preds = lbann.ChannelwiseFullyConnected( result, weights=embedding_weights, output_channel_dims=[vocab_size], bias=False, transpose=True, ) preds = lbann.ChannelwiseSoftmax(preds) preds = lbann.Slice(preds, axis=0, slice_points=str_list(range(sequence_length))) preds = [lbann.Identity(preds) for _ in range(sequence_length - 1)] # Count number of non-pad tokens label_tokens = lbann.Identity( lbann.Slice( input_, slice_points=str_list([sequence_length + 1, 2 * sequence_length]), )) pads = lbann.Constant(value=pad_index, num_neurons=str(sequence_length - 1)) is_not_pad = lbann.NotEqual(label_tokens, pads) num_not_pad = lbann.Reduction(is_not_pad, mode='sum') # Cross entropy loss with label smoothing label_tokens = lbann.Slice( label_tokens, slice_points=str_list(range(sequence_length)), ) label_tokens = [ lbann.Identity(label_tokens) for _ in range(sequence_length - 1) ] if label_smoothing > 0: uniform_label = lbann.Constant(value=1 / vocab_size, num_neurons=str_list([1, vocab_size])) loss = [] for i in range(sequence_length - 1): label = lbann.OneHot(label_tokens[i], size=vocab_size) label = lbann.Reshape(label, dims=str_list([1, vocab_size])) if label_smoothing > 0: label = lbann.WeightedSum( label, uniform_label, scaling_factors=str_list( [1 - label_smoothing, label_smoothing]), ) loss.append(lbann.CrossEntropy(preds[i], label)) loss = lbann.Concatenation(loss) # Average cross entropy over non-pad tokens loss_scales = lbann.Divide( is_not_pad, lbann.Tessellate(num_not_pad, hint_layer=is_not_pad), ) loss = lbann.Multiply(loss, loss_scales) loss = lbann.Reduction(loss, mode='sum') # Construct model metrics = [] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] return lbann.Model( num_epochs, layers=lbann.traverse_layer_graph(input_), objective_function=loss, metrics=metrics, callbacks=callbacks, )
def construct_model(): """Construct LBANN model. JAG Wasserstein autoencoder model """ import lbann # Layer graph input = lbann.Input(target_mode='N/A',name='inp_data') # data is 64*64*4 images + 15 scalar + 5 param inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice') gt_y = lbann.Identity(inp_slice,name='gt_y') gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used zero = lbann.Constant(value=0.0,num_neurons='1',name='zero') one = lbann.Constant(value=1.0,num_neurons='1',name='one') y_dim = 16399 #image+scalar shape z_dim = 20 #Latent space dim z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="20") d1_real, d1_fake, d_adv, pred_y = jag_models.WAE(z_dim,y_dim)(z,gt_y) d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce') img_loss = lbann.MeanSquaredError([pred_y,gt_y]) rec_error = lbann.L2Norm2(lbann.WeightedSum([pred_y,gt_y], scaling_factors="1 -1")) layers = list(lbann.traverse_layer_graph(input)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if(l.weights and "disc0" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2 if(l.weights and "disc1" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01) obj = lbann.ObjectiveFunction([d1_real_bce,d1_fake_bce,d_adv_bce,img_loss,rec_error,l2_reg]) # Initialize check metric callback metrics = [lbann.Metric(img_loss, name='recon_error')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2)] # Construct model num_epochs = 100 return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=obj, callbacks=callbacks)
def make_model(num_vertices=None, node_features=None, num_classes=None, kernel_type='GCN', callbacks=None, num_epochs=1): '''Construct a model DAG using one of the Graph Kernels Args: num_vertices (int): Number of vertices of each graph (default: None) node_features (int): Number of features per noded (default: None) num_classes (int): Number of classes as targets (default: None) kernel_type (str): Graph Kernel to use in model. Expected one of GCN, GIN, Graph, or GatedGraph (deafult: GCN) callbacks (list): Callbacks for the model. If set to None the model description, GPU usage, training_output, and timer is reported. (default: None) num_epochs (int): Number of epochs to run (default: 1) Returns: (lbann.Model) : A model object with the supplied callbacks, dataset presets, and graph kernels. ''' num_vertices = 100 num_classes = 2 node_feature_size = 3 max_edges = 415 #---------------------------------- # Reshape and Slice Input Tensor #---------------------------------- input_ = lbann.Input(data_field='samples') # Input dimensions should be (num_vertices * node_features + num_vertices^2 + num_classes ) data = Graph_Data_Parser(input_, num_vertices, node_feature_size, max_edges, num_classes) feature_matrix = data['node_features'] source_indices = data['source_indices'] target_indices = data['target_indices'] target = data['target'] #---------------------------------- # Select Graph Convolution #---------------------------------- output_channels = 16 graph_kernel_op = None if kernel_type == 'GIN': graph_kernel_op = GINConvLayer elif kernel_type == 'GCN': graph_kernel_op = GCNConvLayer elif kernel_type == 'Graph': graph_kernel_op = GraphConvLayer elif kernel_type == 'GatedGraph': graph_kernel_op = GATConvLayer else: raise ValueError( 'Invalid Graph kernel specifier "{}" recieved. Expected one of:\ GIN,GCN,Graph or GatedGraph'.format(kernel_type)) #---------------------------------- # Perform Graph Convolution #---------------------------------- x = graph_kernel_op(feature_matrix, source_indices, target_indices, num_vertices, max_edges, node_feature_size, output_channels) #---------------------------------- # Apply Reduction on Node Features #---------------------------------- average_vector = lbann.Constant(value=1 / num_vertices, num_neurons=str_list([1, num_vertices]), name="Average_Vector") x = lbann.MatMul(average_vector, x, name="Node_Feature_Reduction") # X is now a vector with output_channel dimensions x = lbann.Reshape(x, dims=str_list([output_channels]), name="Squeeze") x = lbann.FullyConnected(x, num_neurons=64, name="hidden_layer_1") x = lbann.Relu(x, name="hidden_layer_1_activation") x = lbann.FullyConnected(x, num_neurons=num_classes, name="Output_Fully_Connected") #---------------------------------- # Loss Function and Accuracy s #---------------------------------- probs = lbann.Softmax(x, name="Softmax") loss = lbann.CrossEntropy(probs, target, name="Cross_Entropy_Loss") accuracy = lbann.CategoricalAccuracy(probs, target, name="Accuracy") layers = lbann.traverse_layer_graph(input_) if callbacks is None: print_model = lbann.CallbackPrintModelDescription( ) #Prints initial Model after Setup training_output = lbann.CallbackPrint( interval=1, print_global_stat_only=False) #Prints training progress gpu_usage = lbann.CallbackGPUMemoryUsage() timer = lbann.CallbackTimer() callbacks = [print_model, training_output, gpu_usage, timer] else: if isinstance(callbacks, list): callbacks = callbacks metrics = [lbann.Metric(accuracy, name='accuracy', unit="%")] model = lbann.Model(num_epochs, layers=layers, objective_function=loss, metrics=metrics, callbacks=callbacks) return model
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann pad_index = run_args.pad_index assert pad_index is not None #sequence_length = run_args.sequence_length sequence_length = 102 assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Input(target_mode='N/A',name='inp_data') inp_slice = lbann.Slice(input_, axis=0, slice_points="0 102 230",name='inp_slice') inp_smile = lbann.Identity(inp_slice,name='inp_smile') z = lbann.Identity(inp_slice, name='z') #param not used #input_ = lbann.Identity(lbann.Input(name='inp',target_mode="N/A"), name='inp1') vae_loss= [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None save_output = True if run_args.dump_outputs_dir else False #print("Inp smile len ", len(inp_smile), "z len ", len(z)) print("save output? ", save_output, "out dir ", run_args.dump_outputs_dir) #z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="128") x = lbann.Slice(inp_smile, slice_points=str_list([0, input_feature_dims])) x = lbann.Identity(x) waemodel = molwae.MolWAE(input_feature_dims, dictionary_size, embedding_size, pad_index,save_output) x_emb = lbann.Embedding( x, num_embeddings=waemodel.dictionary_size, embedding_dim=waemodel.embedding_size, name='emb', weights=waemodel.emb_weights ) pred, arg_max = waemodel.forward_decoder(x_emb,z) recon = waemodel.compute_loss(x, pred) vae_loss.append(recon) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() for l in layers: weights.update(l.weights) #l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) #vae_loss.append(l2_reg) print("LEN vae loss ", len(vae_loss)) obj = lbann.ObjectiveFunction(vae_loss) # Initialize check metric callback metrics = [lbann.Metric(recon, name='recon')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] #Dump output (activation) for post processing pred_tensor = lbann.Concatenation(arg_max, name='pred_tensor') conc_out = lbann.Concatenation([input_,pred_tensor], name='conc_out') callbacks.append(lbann.CallbackDumpOutputs(batch_interval=run_args.dump_outputs_interval, execution_modes='test', directory=run_args.dump_outputs_dir, layers=f'{conc_out.name}')) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)
def setup(data_reader_file, name='classifier', num_labels=200, mini_batch_size=128, num_epochs=1000, learning_rate=0.1, bn_statistics_group_size=2, fc_data_layout='model_parallel', warmup_epochs=50, learning_rate_drop_interval=50, learning_rate_drop_factor=0.25, checkpoint_interval=None): # Setup input data input = lbann.Input(target_mode = 'classification') images = lbann.Identity(input) labels = lbann.Identity(input) # Classification network head_cnn = modules.ResNet(bn_statistics_group_size=bn_statistics_group_size) class_fc = lbann.modules.FullyConnectedModule(num_labels, activation=lbann.Softmax, name=f'{name}_fc', data_layout=fc_data_layout) x = head_cnn(images) probs = class_fc(x) # Setup objective function cross_entropy = lbann.CrossEntropy([probs, labels]) l2_reg_weights = set() for l in lbann.traverse_layer_graph(input): if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected: l2_reg_weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002) obj = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Setup model metrics = [lbann.Metric(lbann.CategoricalAccuracy([probs, labels]), name='accuracy', unit='%')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if checkpoint_interval: callbacks.append( lbann.CallbackCheckpoint( checkpoint_dir='ckpt', checkpoint_epochs=5 ) ) # Learning rate schedules if warmup_epochs: callbacks.append( lbann.CallbackLinearGrowthLearningRate( target=learning_rate * mini_batch_size / 128, num_epochs=warmup_epochs ) ) if learning_rate_drop_factor: callbacks.append( lbann.CallbackDropFixedLearningRate( drop_epoch=list(range(0, num_epochs, learning_rate_drop_interval)), amt=learning_rate_drop_factor) ) # Construct model model = lbann.Model(num_epochs, layers=lbann.traverse_layer_graph(input), objective_function=obj, metrics=metrics, callbacks=callbacks) # Setup optimizer # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8) opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9) # Load data reader from prototext data_reader_proto = lbann.lbann_pb2.LbannPB() with open(data_reader_file, 'r') as f: google.protobuf.text_format.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader for reader_proto in data_reader_proto.reader: reader_proto.python.module_dir = os.path.dirname(os.path.realpath(__file__)) # Return experiment objects return model, data_reader_proto, opt
def make_model(NUM_NODES, NUM_EDGES, NUM_NODES_FEATURES, NUM_EDGE_FEATURES, EMBEDDING_DIM, EDGE_EMBEDDING_DIM, NUM_OUT_FEATURES, NUM_EPOCHS, NUM_GROUPS=0): """ Creates an LBANN model for the OGB-LSC PPQM4M Dataset Args: NUM_NODES (int): The number of nodes in the largest graph in the dataset (51 for LSC-PPQM4M) NUM_EDGES (int): The number of edges in the largest graph in the dataset (118 for LSC-PPQM4M) NUM_NODES_FEATURES (int): The dimensionality of the input node features vector (9 for LSC-PPQM4M) NUM_EDGE_FEATURES (int): The dimensionality of the input edge feature vectors (3 for LSC-PPQM4M) EMBEDDING_DIM (int): The embedding dimensionality of the node feature vector EDGE_EMBEDDING_DIM (int): The embedding dimensionality of the edge feature vector NUM_OUT_FEATURES (int): The dimensionality of the node feature vectors after graph convolutions NUM_EPOCHS (int): The number of epochs to train the network NUM_GROUPS (int): The number of channel groups for distconv channelwise fully connected layer (default : 0) Returns: (Model): lbann model object """ in_channel = EMBEDDING_DIM out_channel = NUM_OUT_FEATURES output_dimension = 1 _input = lbann.Input(data_field='samples') node_feature_mat, neighbor_feature_mat, edge_feature_mat, edge_indices, target = \ graph_data_splitter(_input, NUM_NODES, NUM_EDGES, NUM_NODES_FEATURES, NUM_EDGE_FEATURES, EMBEDDING_DIM, EDGE_EMBEDDING_DIM) x = NNConvLayer(node_feature_mat, neighbor_feature_mat, edge_feature_mat, edge_indices, in_channel, out_channel, EDGE_EMBEDDING_DIM, NUM_NODES, NUM_EDGES, NUM_GROUPS) for i, num_neurons in enumerate([256, 128, 32, 8], 1): x = lbann.FullyConnected(x, num_neurons=num_neurons, name="hidden_layer_{}".format(i)) x = lbann.Relu(x, name='hidden_layer_{}_activation'.format(i)) x = lbann.FullyConnected(x, num_neurons=output_dimension, name="output") loss = lbann.MeanAbsoluteError(x, target) layers = lbann.traverse_layer_graph(_input) training_output = lbann.CallbackPrint(interval=1, print_global_stat_only=False) gpu_usage = lbann.CallbackGPUMemoryUsage() timer = lbann.CallbackTimer() callbacks = [training_output, gpu_usage, timer] model = lbann.Model(NUM_EPOCHS, layers=layers, objective_function=loss, callbacks=callbacks) return model
def make_model(num_vertices=None, node_features=None, num_classes=None, dataset=None, kernel_type='GCN', callbacks=None, num_epochs=1): '''Construct a model DAG using one of the Graph Kernels Args: num_vertices (int): Number of vertices of each graph (default: None) node_features (int): Number of features per noded (default: None) num_classes (int): Number of classes as targets (default: None) dataset (str): Preset data set to use. Either a datset parameter has to be supplied or all of num_vertices, node_features, and num_classes have to be supplied. (default: None) kernel_type (str): Graph Kernel to use in model. Expected one of GCN, GIN, Graph, or GatedGraph (deafult: GCN) callbacks (list): Callbacks for the model. If set to None the model description, GPU usage, training_output, and timer is reported. (default: None) num_epochs (int): Number of epochs to run (default: 1) Returns: (lbann Model Object: A model object with the supplied callbacks, dataset presets, and graph kernels. ''' assert num_vertices != dataset #Ensure atleast one of the values is set if dataset is not None: assert num_vertices is None if dataset == 'MNIST': num_vertices = 75 num_classes = 10 node_features = 1 elif dataset == 'PROTEINS': num_vertices = 100 num_classes = 2 node_features = 3 else: raise Exception("Unkown Dataset") assert num_vertices is not None assert num_classes is not None assert node_features is not None #---------------------------------- # Reshape and Slice Input Tensor #---------------------------------- input_ = lbann.Input(target_mode='classification') # Input dimensions should be (num_vertices * node_features + num_vertices^2 + num_classes ) # Input should have atleast two children since the target is classification data = lbann_Graph_Data(input_, num_vertices, node_features, num_classes) feature_matrix = data.x adj_matrix = data.adj target = data.y #---------------------------------- # Perform Graph Convolution #---------------------------------- if kernel_type == 'GIN': x = GINConvLayer(feature_matrix, adj_matrix) elif kernel_type == 'GCN': x = GCNConvLayer(feature_matrix, adj_matrix) elif kernel_type == 'Graph': x = GraphConvLayer(feature_matrix, adj_matrix) elif kernel_type == 'GatedGraph': x = GATConvLayer(feature_matrix, adj_matrix) else: ValueError( 'Invalid Graph kernel specifier "{}" recieved. Expected one of:\ GIN,GCN,Graph or GatedGraph'.format(kernel_type)) out_channel = x.shape[1] #---------------------------------- # Apply Reduction on Node Features #---------------------------------- average_vector = lbann.Constant(value=1 / num_vertices, num_neurons=str_list([1, num_vertices]), name="Average_Vector") x = x.get_mat(out_channel) x = lbann.MatMul(average_vector, x, name="Node_Feature_Reduction") # X is now a vector with output_channel dimensions x = lbann.Reshape(x, dims=str_list([out_channel]), name="Squeeze") x = lbann.FullyConnected(x, num_neurons=64, name="hidden_layer_1") x = lbann.Relu(x, name="hidden_layer_1_activation") x = lbann.FullyConnected(x, num_neurons=num_classes, name="Output_Fully_Connected") #---------------------------------- # Loss Function and Accuracy s #---------------------------------- probs = lbann.Softmax(x, name="Softmax") loss = lbann.CrossEntropy(probs, target, name="Cross_Entropy_Loss") accuracy = lbann.CategoricalAccuracy(probs, target, name="Accuracy") layers = lbann.traverse_layer_graph(input_) if callbacks is None: print_model = lbann.CallbackPrintModelDescription( ) #Prints initial Model after Setup training_output = lbann.CallbackPrint( interval=1, print_global_stat_only=False) #Prints training progress gpu_usage = lbann.CallbackGPUMemoryUsage() timer = lbann.CallbackTimer() callbacks = [print_model, training_output, gpu_usage, timer] else: if isinstance(callbacks, list): callbacks = callbacks metrics = [lbann.Metric(accuracy, name='accuracy', unit="%")] model = lbann.Model(num_epochs, layers=layers, objective_function=loss, metrics=metrics, callbacks=callbacks) return model
num_tracked_images=10) summarize_images = lbann.CallbackSummarizeImages( selection_strategy=img_strategy, image_source_layer_name=reconstruction.name, epoch_interval=1) # Dump original image from input layer one time summarize_input_layer = lbann.CallbackSummarizeImages( selection_strategy=img_strategy, image_source_layer_name=input_.name, epoch_interval=10000) callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), summarize_input_layer, summarize_images ] layer_list = list(lbann.traverse_layer_graph(input_)) model = lbann.Model(args.num_epochs, layers=layer_list, objective_function=obj, metrics=metrics, callbacks=callbacks, summary_dir=".") # Setup optimizer opt = lbann.contrib.args.create_optimizer(args) # Setup data reader num_classes = min(args.num_classes, num_labels)
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann print("Dump model dir ", run_args.dump_model_dir) assert run_args.dump_model_dir, "evaluate script asssumes a pretrained WAE model" pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Identity(lbann.Input(name='inp', data_field='samples'), name='inp1') wae_loss = [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None save_output = True if run_args.dump_outputs_dir else False print("save output? ", save_output, "out dir ", run_args.dump_outputs_dir) z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims=run_args.z_dim) waemodel = molwae.MolWAE(input_feature_dims, dictionary_size, embedding_size, pad_index, run_args.z_dim, save_output) recon, d1_real, d1_fake, d_adv, arg_max = waemodel(input_, z) zero = lbann.Constant(value=0.0, num_neurons='1', name='zero') one = lbann.Constant(value=1.0, num_neurons='1', name='one') d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real, one], name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake, zero], name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv, one], name='d_adv_bce') wae_loss.append(recon) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if (l.weights and "disc0" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2 if (l.weights and "disc1" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_weights = [ w for w in weights if not isinstance(w.optimizer, lbann.NoOptimizer) ] l2_reg = lbann.L2WeightRegularization(weights=l2_weights, scale=1e-4) wae_loss.append(d1_real_bce) wae_loss.append(d_adv_bce) wae_loss.append(d1_fake_bce) wae_loss.append(l2_reg) print("LEN wae loss ", len(wae_loss)) obj = lbann.ObjectiveFunction(wae_loss) # Initialize check metric callback metrics = [ lbann.Metric(d_adv_bce, name='adv_loss'), lbann.Metric(recon, name='recon') ] callbacks = [ lbann.CallbackPrint(), #lbann.CallbackStepLearningRate(step=10, amt=0.5), lbann.CallbackTimer() ] callbacks.append( lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2)) #Dump output (activation) for post processing if (run_args.dump_outputs_dir): pred_tensor = lbann.Concatenation(arg_max, name='pred_tensor') callbacks.append( lbann.CallbackDumpOutputs( batch_interval=run_args.dump_outputs_interval, execution_modes='test', directory=run_args.dump_outputs_dir, layers=f'inp pred_tensor {waemodel.q_mu.name}')) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)
def construct_jag_wae_model(ydim, zdim, mcf, useCNN, dump_models, ltfb_batch_interval, num_epochs): """Construct LBANN model. JAG Wasserstein autoencoder model """ # Layer graph input = lbann.Input(data_field='samples', name='inp_data') # data is 64*64*4 images + 15 scalar + 5 param #inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice') inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0, ydim, ydim + 5]), name='inp_slice') gt_y = lbann.Identity(inp_slice, name='gt_y') gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used zero = lbann.Constant(value=0.0, num_neurons='1', name='zero') one = lbann.Constant(value=1.0, num_neurons='1', name='one') z_dim = 20 #Latent space dim z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims="20") model = macc_network_architectures.MACCWAE(zdim, ydim, cf=mcf, use_CNN=useCNN) d1_real, d1_fake, d_adv, pred_y = model(z, gt_y) d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real, one], name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake, zero], name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv, one], name='d_adv_bce') img_loss = lbann.MeanSquaredError([pred_y, gt_y]) rec_error = lbann.L2Norm2( lbann.WeightedSum([pred_y, gt_y], scaling_factors="1 -1")) layers = list(lbann.traverse_layer_graph(input)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if (l.weights and "disc0" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2 if (l.weights and "disc1" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) d_adv_bce = lbann.LayerTerm(d_adv_bce, scale=0.01) obj = lbann.ObjectiveFunction( [d1_real_bce, d1_fake_bce, d_adv_bce, img_loss, rec_error, l2_reg]) # Initialize check metric callback metrics = [lbann.Metric(img_loss, name='recon_error')] #pred_y = macc_models.MACCWAE.pred_y_name callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackPrintModelDescription(), lbann.CallbackSaveModel(dir=dump_models), lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2) ] if (ltfb_batch_interval > 0): callbacks.append( lbann.CallbackLTFB(batch_interval=ltfb_batch_interval, metric='recon_error', low_score_wins=True, exchange_hyperparameters=True)) # Construct model return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=obj, callbacks=callbacks)
pearson_r_var2 = lbann.Variance(pred, name="pearson_r_var2") pearson_r_mult = lbann.Multiply([pearson_r_var1, pearson_r_var2], name="pearson_r_mult") pearson_r_sqrt = lbann.Sqrt(pearson_r_mult, name="pearson_r_sqrt") eps = lbann.Constant(value=1e-07, hint_layer=pearson_r_sqrt) pearson_r = lbann.Divide( [pearson_r_cov, lbann.Add(pearson_r_sqrt, eps)], name="pearson_r") metrics = [lbann.Metric(mse, name='mse')] metrics.append(lbann.Metric(pearson_r, name='pearson_r')) callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] layers = list(lbann.traverse_layer_graph([images, responses])) model = lbann.Model(args.num_epochs, layers=layers, metrics=metrics, objective_function=mse, callbacks=callbacks) # Load data reader from prototext data_reader_proto = lbann.lbann_pb2.LbannPB() with open(data_reader_prototext, 'r') as f: txtf.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader # Setup trainer
def construct_macc_surrogate_model(xdim, ydim, zdim, wae_mcf, surrogate_mcf, lambda_cyc, useCNN, dump_models, pretrained_dir, ltfb_batch_interval, num_epochs): """Construct MACC surrogate model. See https://arxiv.org/pdf/1912.08113.pdf model architecture and other details """ # Layer graph input = lbann.Input(data_field='samples', name='inp_data') # data is 64*64*4 images + 15 scalar + 5 param inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0, ydim, ydim + xdim]), name='inp_slice') gt_y = lbann.Identity(inp_slice, name='gt_y') gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used zero = lbann.Constant(value=0.0, num_neurons='1', name='zero') one = lbann.Constant(value=1.0, num_neurons='1', name='one') z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims="20") wae = macc_network_architectures.MACCWAE( zdim, ydim, cf=wae_mcf, use_CNN=useCNN) #pretrained, freeze inv = macc_network_architectures.MACCInverse(xdim, cf=surrogate_mcf) fwd = macc_network_architectures.MACCForward(zdim, cf=surrogate_mcf) y_pred_fwd = wae.encoder(gt_y) param_pred_ = wae.encoder(gt_y) input_fake = inv(param_pred_) output_cyc = fwd(input_fake) y_image_re2 = wae.decoder(output_cyc) '''**** Train cycleGAN input params <--> latent space of (images, scalars) ****''' output_fake = fwd(gt_x) y_image_re = wae.decoder(output_fake) param_pred2_ = wae.encoder(y_image_re) input_cyc = inv(param_pred2_) L_l2_x = lbann.MeanSquaredError(input_fake, gt_x) L_cyc_x = lbann.MeanSquaredError(input_cyc, gt_x) L_l2_y = lbann.MeanSquaredError(output_fake, y_pred_fwd) L_cyc_y = lbann.MeanSquaredError(output_cyc, y_pred_fwd) #@todo slice here to separate scalar from image img_sca_loss = lbann.MeanSquaredError(y_image_re, gt_y) #L_cyc = L_cyc_y + L_cyc_x L_cyc = lbann.Add(L_cyc_y, L_cyc_x) #loss_gen0 = L_l2_y + lamda_cyc*L_cyc loss_gen0 = lbann.WeightedSum([L_l2_y, L_cyc], scaling_factors=f'1 {lambda_cyc}') loss_gen1 = lbann.WeightedSum([L_l2_x, L_cyc_y], scaling_factors=f'1 {lambda_cyc}') #loss_gen1 = L_l2_x + lamda_cyc*L_cyc_y layers = list(lbann.traverse_layer_graph(input)) weights = set() #Freeze appropriate (pretrained) weights pretrained_models = ["wae"] #add macc? for l in layers: for idx in range(len(pretrained_models)): if (l.weights and pretrained_models[idx] in l.name): for w in range(len(l.weights)): l.weights[w].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) #d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01) # Setup objective function obj = lbann.ObjectiveFunction([loss_gen0, loss_gen1, l2_reg]) # Initialize check metric callback metrics = [ lbann.Metric(img_sca_loss, name='fw_loss'), lbann.Metric(L_l2_x, name='inverse loss'), lbann.Metric(L_cyc_y, name='output cycle loss'), lbann.Metric(L_cyc_x, name='param cycle loss') ] callbacks = [ lbann.CallbackPrint(), lbann.CallbackSaveModel(dir=dump_models), lbann.CallbackLoadModel(dirs=str(pretrained_dir)), lbann.CallbackTimer() ] if (ltfb_batch_interval > 0): callbacks.append( lbann.CallbackLTFB(batch_interval=ltfb_batch_interval, metric='fw_loss', low_score_wins=True, exchange_hyperparameters=True)) # Construct model return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=obj, callbacks=callbacks)
# Run LBANN # ---------------------------------- # Create optimizer opt = lbann.SGD(learn_rate=args.learning_rate) # Create LBANN objects iterations_per_epoch = utils.ceildiv(epoch_size, args.mini_batch_size) num_epochs = utils.ceildiv(args.num_iterations, iterations_per_epoch) trainer = lbann.Trainer( mini_batch_size=args.mini_batch_size, num_parallel_readers=0, ) callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDumpWeights( directory='embeddings', epoch_interval=num_epochs, format='distributed_binary', ), ] model = lbann.Model( num_epochs, layers=lbann.traverse_layer_graph(input_), objective_function=obj, metrics=metrics, callbacks=callbacks, ) # Create batch script
def construct_model(args): """Construct LBANN for CosmoGAN 3D model. """ obj = [] metrics = [] callbacks = [] w = [args.input_width]*3 w.insert(0,args.input_channel) _sample_dims = w ps = None #have model and input ps if(args.use_distconv): ps = get_parallel_strategy_args( sample_groups=args.mini_batch_size, depth_groups=args.depth_groups, height_groups=args.height_groups, ) g_device = 'GPU' input_ = lbann.Input(name='input', data_field='samples') input_ = lbann.Reshape(input_, dims=list2str(_sample_dims),name='in_reshape', device=g_device), x1 = lbann.Identity(input_, parallel_strategy=None, name='x1') x2 = lbann.Identity(input_, name='x2') if args.compute_mse else None zero = lbann.Constant(value=0.0,num_neurons='1',name='zero',device=g_device) one = lbann.Constant(value=1.0,num_neurons='1',name='one', device=g_device) z = lbann.Reshape(lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="64", name='noise_vec', device=g_device), dims='1 64', name='noise_vec_reshape',device=g_device) print("RUN ARGS ", args) d1_real,d1_fake,d_adv, gen_img = model.Exa3DGAN(args.input_width,args.input_channel, g_device,ps,use_bn=args.use_bn)(x1,z) layers=list(lbann.traverse_layer_graph([d1_real, d1_fake])) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if(l.weights and "disc1" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2, analogous to discrim.trainable=False in Keras if(l.weights and "disc2" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce') mse = lbann.MeanSquaredError([gen_img, x2], name='MSE') if args.compute_mse else None obj.append(d1_real_bce) obj.append(d1_fake_bce) obj.append(d_adv_bce) metrics.append(lbann.Metric(d_adv_bce, name='d_adv_bce')) metrics.append(lbann.Metric(d1_real_bce, name='d1_real_bce')) metrics.append(lbann.Metric(d1_fake_bce, name='d1_fake_bce')) if (mse is not None): obj.append(mse) metrics.append(lbann.Metric(mse, name='MSE')) callbacks.append(lbann.CallbackPrint()) callbacks.append(lbann.CallbackTimer()) callbacks.append(lbann.CallbackGPUMemoryUsage()) # ------------------------------------------ # Construct model # ------------------------------------------ return lbann.Model(args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)