Example #1
0
def run_experiment(args,
                   model,
                   data_reader_proto,
                   optimizer):
    # Run experiment
    if not args.disable_run:
        from lbann.contrib.lc.paths import imagenet_dir, imagenet_labels
        import lbann.contrib.lc.launcher
        kwargs = {}
        if args.nodes:
            kwargs['nodes'] = args.nodes
        if args.procs_per_node:
            kwargs['procs_per_node'] = args.procs_per_node
        if args.partition:
            kwargs['partition'] = args.partition
        if args.account:
            kwargs['account'] = args.account
        if args.time_limit:
            kwargs['time_limit'] = args.time_limit
        if args.imagenet_classes:
            classes = args.imagenet_classes
            kwargs['lbann_args'] = (
                '--data_filedir_train={} --data_filename_train={} '
                '--data_filedir_test={} --data_filename_test={}'
                    .format(imagenet_dir(data_set='train', num_classes=classes),
                            imagenet_labels(data_set='train',
                                            num_classes=classes),
                            imagenet_dir(data_set='val', num_classes=classes),
                            imagenet_labels(data_set='val',
                                            num_classes=classes)))
        lbann.contrib.lc.launcher.run(model,
                                      data_reader_proto,
                                      optimizer,
                                      job_name='lbann_densenet',
                                      **kwargs)
Example #2
0
def make_data_reader(num_classes=1000):

    # Load Protobuf message from file
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protobuf_file = os.path.join(current_dir, 'data_reader.prototext')
    message = lbann.lbann_pb2.LbannPB()
    with open(protobuf_file, 'r') as f:
        google.protobuf.text_format.Merge(f.read(), message)
    message = message.data_reader

    # Paths to ImageNet data
    # Note: Paths are only known for some compute centers
    compute_center = lbann.contrib.launcher.compute_center()
    if compute_center == 'lc':
        from lbann.contrib.lc.paths import imagenet_dir, imagenet_labels
        train_data_dir = imagenet_dir(data_set='train',
                                      num_classes=num_classes)
        train_label_file = imagenet_labels(data_set='train',
                                           num_classes=num_classes)
        test_data_dir = imagenet_dir(data_set='val', num_classes=num_classes)
        test_label_file = imagenet_labels(data_set='val',
                                          num_classes=num_classes)
    elif compute_center == 'nersc':
        from lbann.contrib.nersc.paths import imagenet_dir, imagenet_labels
        train_data_dir = imagenet_dir(data_set='train')
        train_label_file = imagenet_labels(data_set='train')
        test_data_dir = imagenet_dir(data_set='val')
        test_label_file = imagenet_labels(data_set='val')
    else:
        raise RuntimeError(
            f'ImageNet data paths are unknown for current compute center ({compute_center})'
        )

    # Check that data paths are accessible
    if not os.path.isdir(train_data_dir):
        raise FileNotFoundError('could not access {}'.format(train_data_dir))
    if not os.path.isfile(train_label_file):
        raise FileNotFoundError('could not access {}'.format(train_label_file))
    if not os.path.isdir(test_data_dir):
        raise FileNotFoundError('could not access {}'.format(test_data_dir))
    if not os.path.isfile(test_label_file):
        raise FileNotFoundError('could not access {}'.format(test_label_file))

    # Set paths
    message.reader[0].data_filedir = train_data_dir
    message.reader[0].data_filename = train_label_file
    message.reader[1].data_filedir = test_data_dir
    message.reader[1].data_filename = test_label_file

    return message
Example #3
0
data_reader_proto = lbann.lbann_pb2.LbannPB()
with open(args.data_reader, 'r') as f:
    txtf.Merge(f.read(), data_reader_proto)
data_reader_proto = data_reader_proto.data_reader

# Save prototext
if args.prototext:
    lbann.proto.save_prototext(args.prototext,
                               model=model,
                               optimizer=opt,
                               data_reader=data_reader_proto)

# Run experiment
if not args.prototext:
    from lbann.contrib.lc.paths import imagenet_dir, imagenet_labels
    import lbann.contrib.lc.launcher
    kwargs = lbann.contrib.args.get_scheduler_kwargs(args)
    classes = args.num_labels
    kwargs['lbann_args'] = (
        '--data_filedir_train={} --data_filename_train={} '
        '--data_filedir_test={} --data_filename_test={}'.format(
            imagenet_dir(data_set='train', num_classes=classes),
            imagenet_labels(data_set='train', num_classes=classes),
            imagenet_dir(data_set='val', num_classes=classes),
            imagenet_labels(data_set='val', num_classes=classes)))
    lbann.contrib.lc.launcher.run(model,
                                  data_reader_proto,
                                  opt,
                                  job_name='lbann_alexnet',
                                  **kwargs)