def setup_embeddings(script, config): # Get parameters num_vertices = config.getint('Graph', 'num_vertices') motif_size = config.getint('Motifs', 'motif_size') walk_length = config.getint('Walks', 'walk_length') embeddings_dir = config.get('Embeddings', 'embeddings_dir') embed_dim = config.getint('Embeddings', 'embed_dim') learn_rate = config.getfloat('Embeddings', 'learn_rate') mini_batch_size = config.getint('Embeddings', 'mini_batch_size') sgd_steps = config.getint('Embeddings', 'sgd_steps') sgd_steps_per_epoch = config.getint('Embeddings', 'sgd_steps_per_epoch') assert (num_vertices>0 and motif_size>0 and walk_length>=motif_size and embeddings_dir and embed_dim>0 and mini_batch_size>0 and sgd_steps>=0 and sgd_steps_per_epoch>0), \ 'invalid configuration for training embeddings' # Construct LBANN objects num_epochs = (sgd_steps + sgd_steps_per_epoch - 1) // sgd_steps_per_epoch trainer = lbann.Trainer( mini_batch_size=mini_batch_size, num_parallel_readers=0, ) model_ = make_model( motif_size, walk_length, num_vertices, embed_dim, learn_rate, num_epochs, embeddings_dir, ) optimizer = lbann.SGD(learn_rate=learn_rate) data_reader = make_data_reader() # Add LBANN invocation to batch script prototext_file = os.path.join(script.work_dir, 'experiment.prototext') lbann.proto.save_prototext( prototext_file, trainer=trainer, model=model_, data_reader=data_reader, optimizer=optimizer, ) script.add_body_line('') script.add_body_line('# Train embeddings') script.add_parallel_command([ lbann.lbann_exe(), f'--prototext={prototext_file}', f'--num_io_threads=1', ])
obj.append(lbann.WeightedSum(negative_loss, scaling_factors='2')) metrics.append(lbann.Metric(positive_loss, name='positive loss')) metrics.append(lbann.Metric(negative_loss, name='negative loss')) # Perform computation at double precision for l in lbann.traverse_layer_graph(input_): l.datatype = lbann.DataType.DOUBLE for w in l.weights: w.datatype = lbann.DataType.DOUBLE # ---------------------------------- # Run LBANN # ---------------------------------- # Create optimizer opt = lbann.SGD(learn_rate=args.learning_rate) # Create LBANN objects iterations_per_epoch = utils.ceildiv(epoch_size, args.mini_batch_size) num_epochs = utils.ceildiv(args.num_iterations, iterations_per_epoch) trainer = lbann.Trainer( mini_batch_size=args.mini_batch_size, num_parallel_readers=0, ) callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDumpWeights( directory='embeddings', epoch_interval=num_epochs, format='distributed_binary',
def setup(data_reader_file, name='classifier', num_labels=200, mini_batch_size=128, num_epochs=1000, learning_rate=0.1, bn_statistics_group_size=2, fc_data_layout='model_parallel', warmup_epochs=50, learning_rate_drop_interval=50, learning_rate_drop_factor=0.25, checkpoint_interval=None): # Setup input data input = lbann.Input(target_mode = 'classification') images = lbann.Identity(input) labels = lbann.Identity(input) # Classification network head_cnn = modules.ResNet(bn_statistics_group_size=bn_statistics_group_size) class_fc = lbann.modules.FullyConnectedModule(num_labels, activation=lbann.Softmax, name=f'{name}_fc', data_layout=fc_data_layout) x = head_cnn(images) probs = class_fc(x) # Setup objective function cross_entropy = lbann.CrossEntropy([probs, labels]) l2_reg_weights = set() for l in lbann.traverse_layer_graph(input): if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected: l2_reg_weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002) obj = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Setup model metrics = [lbann.Metric(lbann.CategoricalAccuracy([probs, labels]), name='accuracy', unit='%')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if checkpoint_interval: callbacks.append( lbann.CallbackCheckpoint( checkpoint_dir='ckpt', checkpoint_epochs=5 ) ) # Learning rate schedules if warmup_epochs: callbacks.append( lbann.CallbackLinearGrowthLearningRate( target=learning_rate * mini_batch_size / 128, num_epochs=warmup_epochs ) ) if learning_rate_drop_factor: callbacks.append( lbann.CallbackDropFixedLearningRate( drop_epoch=list(range(0, num_epochs, learning_rate_drop_interval)), amt=learning_rate_drop_factor) ) # Construct model model = lbann.Model(num_epochs, layers=lbann.traverse_layer_graph(input), objective_function=obj, metrics=metrics, callbacks=callbacks) # Setup optimizer # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8) opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9) # Load data reader from prototext data_reader_proto = lbann.lbann_pb2.LbannPB() with open(data_reader_file, 'r') as f: google.protobuf.text_format.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader for reader_proto in data_reader_proto.reader: reader_proto.python.module_dir = os.path.dirname(os.path.realpath(__file__)) # Return experiment objects return model, data_reader_proto, opt
# Setup model mini_batch_size = 64 num_epochs = 20 model = lbann.Model(num_epochs, layers=lbann.traverse_layer_graph(input_), objective_function=loss, metrics=[lbann.Metric(acc, name='accuracy', unit='%')], callbacks=[ lbann.CallbackPrintModelDescription(), lbann.CallbackPrint(), lbann.CallbackTimer() ]) # Setup optimizer opt = lbann.SGD(learn_rate=0.01, momentum=0.9) # Setup data reader data_reader = data.mnist.make_data_reader() # Setup trainer trainer = lbann.Trainer(mini_batch_size=mini_batch_size) # ---------------------------------- # Run experiment # ---------------------------------- kwargs = lbann.contrib.args.get_scheduler_kwargs(args) lbann.contrib.launcher.run(trainer, model, data_reader, opt,
_reader.python.module = 'dataset' _reader.python.module_dir = os.path.dirname(os.path.realpath(__file__)) _reader.python.sample_function = 'get_sample' _reader.python.num_samples_function = 'num_samples' _reader.python.sample_dims_function = 'sample_dims' # ---------------------------------- # Run LBANN # ---------------------------------- # Create optimizer # Note: Learning rate in original word2vec is 0.025 learning_rate = args.learning_rate if learning_rate < 0: learning_rate = 0.025 * args.mini_batch_size opt = lbann.SGD(learn_rate=learning_rate) # Create LBANN objects trainer = lbann.Trainer(mini_batch_size=args.mini_batch_size) callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackDumpWeights(basename='embeddings', epoch_interval=args.num_epochs), ] model = lbann.Model(args.num_epochs, layers=lbann.traverse_layer_graph(input_), objective_function=obj, callbacks=callbacks) # Run LBANN
lbann.Reshape(lbann_scales, dims=str_list([-1, 1]))) # Construct LBANN model with metric checking and gradient checking metric = lbann.Metric(lbann_z, name='metric') callbacks = [ lbann.CallbackCheckMetric( metric=metric.name, lower_bound=np_z - tol, upper_bound=np_z + tol, error_on_failure=True, execution_modes='test', ), lbann.CallbackCheckGradients(error_on_failure=True), ] model = lbann.Model( epochs=0, layers=lbann.traverse_layer_graph([input_, lbann_x]), objective_function=lbann_z, metrics=metric, callbacks=callbacks, ) # Run LBANN lbann.run( trainer=lbann.Trainer(mini_batch_size=1), model=model, data_reader=reader, optimizer=lbann.SGD(), job_name='lbann_fftshift_test', )
def setup(num_patches=3, mini_batch_size=512, num_epochs=75, learning_rate=0.005, bn_statistics_group_size=2, fc_data_layout='model_parallel', warmup=True, checkpoint_interval=None): # Data dimensions patch_dims = patch_generator.patch_dims num_labels = patch_generator.num_labels(num_patches) # Extract tensors from data sample input = lbann.Input() slice_points = [0] for _ in range(num_patches): patch_size = functools.reduce(operator.mul, patch_dims) slice_points.append(slice_points[-1] + patch_size) slice_points.append(slice_points[-1] + num_labels) sample = lbann.Slice(input, slice_points=str_list(slice_points)) patches = [ lbann.Reshape(sample, dims=str_list(patch_dims)) for _ in range(num_patches) ] labels = lbann.Identity(sample) # Siamese network head_cnn = modules.ResNet( bn_statistics_group_size=bn_statistics_group_size) heads = [head_cnn(patch) for patch in patches] heads_concat = lbann.Concatenation(heads) # Classification network class_fc1 = modules.FcBnRelu( 4096, statistics_group_size=bn_statistics_group_size, name='siamese_class_fc1', data_layout=fc_data_layout) class_fc2 = modules.FcBnRelu( 4096, statistics_group_size=bn_statistics_group_size, name='siamese_class_fc2', data_layout=fc_data_layout) class_fc3 = lbann.modules.FullyConnectedModule(num_labels, activation=lbann.Softmax, name='siamese_class_fc3', data_layout=fc_data_layout) x = class_fc1(heads_concat) x = class_fc2(x) probs = class_fc3(x) # Setup objective function cross_entropy = lbann.CrossEntropy([probs, labels]) l2_reg_weights = set() for l in lbann.traverse_layer_graph(input): if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected: l2_reg_weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002) obj = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Setup model metrics = [ lbann.Metric(lbann.CategoricalAccuracy([probs, labels]), name='accuracy', unit='%') ] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if checkpoint_interval: callbacks.append( lbann.CallbackCheckpoint(checkpoint_dir='ckpt', checkpoint_epochs=5)) # Learning rate schedules if warmup: callbacks.append( lbann.CallbackLinearGrowthLearningRate(target=learning_rate * mini_batch_size / 128, num_epochs=5)) callbacks.append( lbann.CallbackDropFixedLearningRate(drop_epoch=list(range(0, 100, 15)), amt=0.25)) # Construct model model = lbann.Model(num_epochs, layers=lbann.traverse_layer_graph(input), objective_function=obj, metrics=metrics, callbacks=callbacks) # Setup optimizer opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9) # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8) # Setup data reader data_reader = make_data_reader(num_patches) # Return experiment objects return model, data_reader, opt
model = Sparse_Graph_Trainer.make_model(dataset='PROTEINS', kernel_type='GatedGraph', num_epochs=num_epochs) elif (model_arch == 'DGCN'): model = Dense_Graph_Trainer.make_model(dataset='PROTEINS', kernel_type='GCN', num_epochs=num_epochs) elif (model_arch == 'DGRAPH'): model = Dense_Graph_Trainer.make_model(dataset='PROTEINS', kernel_type='Graph', num_epochs=num_epochs) else: model = Sparse_Graph_Trainer.make_model(dataset='PROTEINS', kernel_type='GCN', num_epochs=num_epochs) optimizer = lbann.SGD(learn_rate=1e-3) #add logic for choosing a dataset data_reader = data.PROTEINS.make_data_reader() trainer = lbann.Trainer(mini_batch_size=mini_batch_size) lbann.contrib.launcher.run(trainer, model, data_reader, optimizer, job_name=job_name, **kwargs)
learn_rate = 0.025 # ---------------------------------- # Construct LBANN objects # ---------------------------------- trainer = lbann.Trainer( mini_batch_size=args.mini_batch_size, num_parallel_readers=0, ) model_ = model.make_model( data_dim, latent_dim, args.num_epochs, ) optimizer = lbann.SGD(learn_rate=learn_rate, momentum=0.9) data_reader = data.make_data_reader() # ---------------------------------- # Run # ---------------------------------- kwargs = lbann.contrib.args.get_scheduler_kwargs(args) lbann.contrib.launcher.run( trainer, model_, data_reader, optimizer, job_name='lbann_motif', work_dir=args.work_dir, batch_job=args.batch_job,