fldr_name=now.strftime('%Y%m%d_%H%M%S') ## time format data_pct,val_ratio=1.0,0.1 # Percentage of data to use, % of data for validation batchsize=args.batchsize step_interval=args.step_interval print('Step interval',step_interval) work_dir="/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/512square/{0}_bsize{1}_{2}".format(fldr_name,batchsize,args.suffix) ##################### ### Run lbann trainer = lbann.Trainer(mini_batch_size=batchsize,random_seed=random_seed,callbacks=lbann.CallbackCheckpoint(checkpoint_dir='chkpt', checkpoint_epochs=10)) # checkpoint_steps=step_interval)) spectral_loss=args.spec_loss print("Spectral loss: ",spectral_loss) model = construct_model(num_epochs,mcr,spectral_loss=spectral_loss,save_batch_interval=int(step_interval)) #'step_interval*val_ratio' is the step interval for validation set. # Setup optimizer opt = lbann.Adam(learn_rate=args.learn_rate,beta1=0.5,beta2=0.99,eps=1e-8) # Load data reader from prototext data_reader = construct_data_reader(data_pct,val_ratio) status = lbann.run(trainer,model, data_reader, opt, nodes=num_nodes, procs_per_node=num_procs, work_dir=work_dir, scheduler='slurm', time_limit=1440, setup_only=False) print(status)
if __name__ == '__main__': import lbann y_dim = 16399 #image+scalar shape z_dim = 20 #Latent space dim num_epochs = 100 mini_batch_size = 128 trainer = lbann.Trainer(mini_batch_size=mini_batch_size, serialize_io=True) model = jag_models.construct_jag_wae_model(y_dim=y_dim, z_dim=z_dim, num_epochs=num_epochs) # Setup optimizer opt = lbann.Adam(learn_rate=0.0001, beta1=0.9, beta2=0.99, eps=1e-8) # Load data reader from prototext data_reader_proto = lbann.lbann_pb2.LbannPB() with open(data_reader_prototext, 'r') as f: txtf.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader status = lbann.run(trainer, model, data_reader_proto, opt, scheduler='slurm', nodes=1, procs_per_node=1, time_limit=360, job_name='jag_wae') print(status)
lbann.Reshape(lbann_scales, dims=str_list([-1, 1]))) # Construct LBANN model with metric checking and gradient checking metric = lbann.Metric(lbann_z, name='metric') callbacks = [ lbann.CallbackCheckMetric( metric=metric.name, lower_bound=np_z - tol, upper_bound=np_z + tol, error_on_failure=True, execution_modes='test', ), lbann.CallbackCheckGradients(error_on_failure=True), ] model = lbann.Model( epochs=0, layers=lbann.traverse_layer_graph([input_, lbann_x]), objective_function=lbann_z, metrics=metric, callbacks=callbacks, ) # Run LBANN lbann.run( trainer=lbann.Trainer(mini_batch_size=1), model=model, data_reader=reader, optimizer=lbann.SGD(), job_name='lbann_fftshift_test', )
x = lbann.WeightsLayer( weights=lbann.Weights( lbann.ValueInitializer(values=str_list(image.flatten())), ), dims=str_list(image.shape), ) max_r = image.shape[-1] // 2 rprof = RadialProfile()(x, image.shape, max_r) rprof_slice = lbann.Slice(rprof, slice_points=str_list([0, 1, 2, 3])) red = lbann.Identity(rprof_slice, name='red') green = lbann.Identity(rprof_slice, name='green') blue = lbann.Identity(rprof_slice, name='blue') # Construct model callbacks = [ lbann.CallbackDumpOutputs(layers=str_list(['red', 'green', 'blue'])), ] model = lbann.Model( epochs=0, layers=lbann.traverse_layer_graph([input_, rprof]), callbacks=callbacks, ) # Run LBANN lbann.run( trainer=lbann.Trainer(mini_batch_size=1), model=model, data_reader=reader, optimizer=lbann.NoOptimizer(), job_name='lbann_radial_profile_test', )
model = construct_model(num_epochs, mcr, save_batch_interval=save_interval) # Setup optimizer opt = lbann.Adam(learn_rate=0.0002, beta1=0.5, beta2=0.99, eps=1e-8) # Load data reader from prototext data_reader = construct_data_reader(data_pct, val_ratio) ### Initialize LBANN inf executable lbann_exe = abspath(lbann.lbann_exe()) lbann_exe = join(dirname(lbann_exe), 'lbann_inf') print('Loading model from :', args.pretrained_dir) status = lbann.run( trainer, model, data_reader, opt, lbann_exe=lbann_exe, nodes=num_nodes, procs_per_node=num_procs, work_dir=work_dir, scheduler='slurm', time_limit=1440, setup_only=False, # job_name='gen_imgs_', lbann_args=[ f'--load_model_weights_dir={args.pretrained_dir}', '--load_model_weights_dir_is_complete', ]) print(status)
model = lbann.Model(mini_batch_size, num_epochs, layers=lbann.traverse_layer_graph(input), objective_function=loss, metrics=[lbann.Metric(acc, name='accuracy', unit='%')], callbacks=[lbann.CallbackPrint(), lbann.CallbackTimer()]) # Setup optimizer opt = lbann.SGD(learn_rate=0.01, momentum=0.9) # Load data reader from prototext model_zoo_dir = os.path.dirname(os.path.dirname(__file__)) data_reader_file = os.path.join(model_zoo_dir, 'data_readers', 'data_reader_mnist.prototext') data_reader_proto = lbann.lbann_pb2.LbannPB() with open(data_reader_file, 'r') as f: txtf.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader # ---------------------------------- # Run experiment # ---------------------------------- # Note: Use `lbann.contrib.lc.launcher.run` instead for optimized # defaults on LC systems. kwargs = {} if args.partition: kwargs['partition'] = args.partition if args.account: kwargs['account'] = args.account lbann.run(model, data_reader_proto, opt, job_name='lbann_lenet', **kwargs)
# # Load data reader from prototext # data_reader_proto = lbann.lbann_pb2.LbannPB() # with open(data_reader_prototext, 'r') as f: # txtf.Merge(f.read(), data_reader_proto) # data_reader_proto = data_reader_proto.data_reader # kwargs = lbann.contrib.args.get_scheduler_kwargs(args) status = lbann.run( trainer, model, data_reader, opt, lbann_exe=lbann_exe, scheduler='slurm', nodes=1, procs_per_node=1, time_limit=30, setup_only=False, batch_job=False, job_name='gen_images', lbann_args=[ '--preload_data_store --use_data_store --load_model_weights_dir_is_complete', # f'--metadata={metadata_prototext}', f'--load_model_weights_dir={args.pretrained_dir}', # f'--index_list_test={args.index_list_test}', # f'--data_filedir_test={args.data_filedir_test}' ] # , **kwargs) ) # print(status)
objective_function=mse, callbacks=callbacks) if __name__ == '__main__': import lbann mini_batch_size = 256 trainer = lbann.Trainer(mini_batch_size=mini_batch_size) model = construct_model() # Setup optimizer opt = lbann.Adam(learn_rate=0.0001, beta1=0.9, beta2=0.99, eps=1e-8) # Load data reader from prototext data_reader_proto = lbann.lbann_pb2.LbannPB() with open(data_reader_prototext, 'r') as f: txtf.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader status = lbann.run( trainer, model, data_reader_proto, opt, scheduler='lsf', nodes=2, procs_per_node=4, time_limit=360, #setup_only=True, job_name='p1_combo') print(status)