예제 #1
0
def main(args):
    # Fix random seeds and threads
    np.random.seed(args.seed)
    tf.random.set_seed(args.seed)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create logdir name
    args.logdir = os.path.join("logs", "{}-{}-{}".format(
        os.path.basename(globals().get("__file__", "notebook")),
        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items())))
    ))

    # Load data
    cifar = CIFAR10()

    # TODO: Create the model and train it
    model = ...

    # Generate test set annotations, but in args.logdir to allow parallel execution.
    os.makedirs(args.logdir, exist_ok=True)
    with open(os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8") as predictions_file:
        for probs in model.predict(cifar.test.data["images"], batch_size=args.batch_size):
            print(np.argmax(probs), file=predictions_file)
예제 #2
0
def main(_):
    os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index

    # Evaluation optimizers and dropout
    optimizer_options = [
        'SGDNesterov', 'Adagrad', 'RMSProp', 'AdaDelta', 'Adam'
    ]
    dropout_options = [False, True]

    # Initialize model and log folders
    if FLAGS.load_model is None:
        cur_time = datetime.now().strftime("%Y%m%d-%H%M")
    else:
        cur_time = FLAGS.load_model

    model_dir, log_dir = make_folders(is_train=FLAGS.is_train,
                                      base=FLAGS.model,
                                      cur_time=cur_time)
    init_logger(log_dir=log_dir, is_train=FLAGS.is_train)

    if FLAGS.model.lower() == 'logistic' or FLAGS.model.lower(
    ) == 'neural_network':
        # Initialize MNIST dataset and print info
        data = MNIST(log_dir=log_dir)
        data.info(use_logging=True if FLAGS.is_train else False,
                  show_img=False)  # print basic information
    elif FLAGS.model.lower() == 'cnn':
        # Initialize CIFAR10 dataset and print info
        data = CIFAR10(log_dir=log_dir, is_train=FLAGS.is_train)
        data.info(use_logging=True if FLAGS.is_train else False,
                  show_img=False,
                  smooth=True)
        data.preprocessing(use_whiten=FLAGS.is_whiten
                           )  # data preprocessing [whiten or subtract_mean]
    else:
        raise NotImplementedError

    if FLAGS.is_train:
        train(data, optimizer_options, dropout_options, model_dir, log_dir)
    else:
        test(data, optimizer_options, dropout_options, model_dir, log_dir)
예제 #3
0
파일: data.py 프로젝트: ami-GS/ngraph
def ingest_cifar10(root_dir, padded_size=32, overwrite=False):
    '''
    Save CIFAR-10 dataset as PNG files
    '''
    out_dir = os.path.join(root_dir, 'cifar10')

    set_names = ('train', 'valid')
    manifest_files = [
        os.path.join(out_dir, setn + '-index.csv') for setn in set_names
    ]

    if (all([os.path.exists(manifest) for manifest in manifest_files])
            and not overwrite):
        return manifest_files

    datasets = CIFAR10(out_dir).load_data()

    pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0
    pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size))

    # Now write out image files and manifests
    for setn, manifest, data in zip(set_names, manifest_files, datasets):
        records = [('@FILE', 'STRING')]
        img_path = os.path.join(out_dir, setn)
        if not os.path.isdir(img_path):
            os.makedirs(img_path)

        for idx, (img, lbl) in enumerate(
                tqdm(zip(data['image']['data'], data['label']['data']))):
            im = np.pad(img.reshape((3, 32, 32)), pad_width, mode='mean')
            im = Image.fromarray(
                np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()))
            fname = os.path.join(img_path, '{}_{:05d}.png'.format(lbl, idx))
            im.save(fname, format='PNG')
            records.append((os.path.relpath(fname, out_dir), lbl))
        np.savetxt(manifest, records, fmt='%s\t%s')

    return manifest_files
예제 #4
0
    transforms.ToTensor(),
    transforms.Normalize(cifar10_mean_color, cifar10_std_color),
])

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(cifar10_mean_color, cifar10_std_color),
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(cifar10_mean_color, cifar10_std_color)
])
# Datasets
train_dataset = CIFAR10(args.cifar10_dir,
                        split='train',
                        download=True,
                        transform=transform)
val_dataset = CIFAR10(args.cifar10_dir,
                      split='val',
                      download=True,
                      transform=train_transform)
test_dataset = CIFAR10(args.cifar10_dir,
                       split='test',
                       download=True,
                       transform=transform)

####DATA augmentation

# DataLoaders
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=args.batch_size,
예제 #5
0
parser.add_argument('--ghostsize', type=int, default=100)
parser.add_argument('--lr', type=float, default=0.1)
parser.add_argument('--momentum', type=float, default=0.9)
parser.add_argument('--weightdecay', type=float, default=5e-4)
parser.add_argument('--aug', action='store_true', default=False)
parser.add_argument('--model', type=str, default='vgg')
parser.add_argument('--resume', type=str, default=None)
parser.add_argument('--datadir', type=str, default='/mnt/home/haoyi/jingfengwu/datasets/CIFAR10/numpy')
parser.add_argument('--logdir', type=str, default='logs/SGD')

args = parser.parse_args()
logger = LogSaver(args.logdir)
logger.save(str(args), 'args')

# data
dataset = CIFAR10(args.datadir)
logger.save(str(dataset), 'dataset')
test_list = dataset.getTestList(500, True)

# model
start_iter = 0
lr = args.lr
if args.model == 'resnet':
    from resnet import ResNet18
    model = ResNet18().cuda()
elif args.model == 'vgg':
    from vgg import vgg11
    model = vgg11().cuda()
else:
    raise NotImplementedError()
criterion = torch.nn.CrossEntropyLoss().cuda()
def main(args):
    # ----->>> Note that script was run on Google Collab as following;
    # with tf.device('/device:GPU:0'):

    # Fix random seeds and threads
    np.random.seed(args.seed)
    tf.random.set_seed(args.seed)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create logdir name
    args.logdir = os.path.join(
        "logs", "{}-{}-{}".format(
            os.path.basename(globals().get("__file__", "notebook")),
            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), ",".join(
                ("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value)
                 for key, value in sorted(vars(args).items())))))

    # Load data
    cifar = CIFAR10()

    # IMAGE AUGMENTATION SETUP
    train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True)

    # RENAME DATASETS FOR CONVENIENCE
    train_images = cifar.train.data["images"]
    train_labels = cifar.train.data["labels"]
    dev_images = cifar.dev.data["images"]
    dev_labels = cifar.dev.data["labels"]
    test_images = cifar.test.data["images"]
    train_images = train_images.astype('float32')
    dev_images = dev_images.astype('float32')
    test_images = test_images.astype('float32')

    # STANDARDIZE THE DATA
    mean = np.mean(train_images, axis=(0, 1, 2, 3))
    std = np.std(train_images, axis=(0, 1, 2, 3))

    train_images = (train_images - mean) / (std + 1e-7)
    dev_images = (dev_images - mean) / (std + 1e-7)
    test_images = (test_images - mean) / (std + 1e-7)

    # ------------------------------------------------------------------------------------------------------------------
    # -------------------------------------------- ENSEMBLE MODEL ------------------------------------------------------
    # ------------------------------------------------------------------------------------------------------------------
    '''models = []

    for model_x in range(args.models):
        np.random.seed(args.seed + model_x)
        tf.random.set_seed(args.seed + model_x)

        # Create logdir name
        args.logdir = os.path.join("logs", "{}-{}-{}".format(
            os.path.basename(globals().get("__file__", "notebook")),
            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
            ",".join(
                ("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in
                 sorted(vars(args).items())))
        ))

        dropout_rate = 0.2
        l2_rate = 1e-4

        # CREATE A MODEL
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu',
                                         kernel_regularizer=tf.keras.regularizers.l2(l2_rate),
                                         input_shape=(32, 32, 3)))
        model.add(tf.keras.layers.Activation('relu'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu',
                                         kernel_regularizer=tf.keras.regularizers.l2(l2_rate)))
        model.add(tf.keras.layers.Activation('relu'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
        model.add(tf.keras.layers.Dropout(dropout_rate))

        model.add(tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu',
                                         kernel_regularizer=tf.keras.regularizers.l2(l2_rate)))
        model.add(tf.keras.layers.Activation('relu'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu',
                                         kernel_regularizer=tf.keras.regularizers.l2(l2_rate)))
        model.add(tf.keras.layers.Activation('relu'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
        model.add(tf.keras.layers.Dropout(dropout_rate + 0.1))

        model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu',
                                         kernel_regularizer=tf.keras.regularizers.l2(l2_rate)))
        model.add(tf.keras.layers.Activation('relu'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu',
                                         kernel_regularizer=tf.keras.regularizers.l2(l2_rate)))
        model.add(tf.keras.layers.Activation('relu'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
        model.add(tf.keras.layers.Dropout(dropout_rate + 0.2))

        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dropout(dropout_rate + 0.3))
        model.add(tf.keras.layers.Dense(10, activation='softmax'))

        # APPEND CREATED MODEL INTO LIST
        models.append(model)

        # COMPILE  MODEL INSIDE LIST
        models[-1].compile(
            optimizer=tf.keras.optimizers.RMSprop(lr=0.001, decay=1e-6),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
        )

        # SET TensorBoard
        tb_callback = tf.keras.callbacks.TensorBoard(args.logdir, histogram_freq=1, update_freq=100,
                                                     profile_batch=0)

        # FIT MODEL INSIDE LIST
        models[-1].fit(
            train_generator.flow(tf.reshape(train_images, [-1, 32, 32, 3]), train_labels, seed=args.seed,
                                 batch_size=args.batch_size), shuffle=False,
            epochs=args.epochs, steps_per_epoch=train_images.shape[0] // args.batch_size,
            validation_data=(dev_images, dev_labels),
            callbacks=[tb_callback], verbose=2
        )

        # SAVE MODEL
        models[-1].save('cifar_' + str(model_x) + '.h5')'''

    # ------------------------------------------------------------------------------------------------------------------
    # ------------------------------------------- TEST SAVED MODELS ----------------------------------------------------
    # ------------------------------------------------------------------------------------------------------------------
    # LOAD SAVED MODELS INTO LIST
    cifar_0 = tf.keras.models.load_model('cifar_0.h5')
    cifar_1 = tf.keras.models.load_model('cifar_1.h5')
    cifar_2 = tf.keras.models.load_model('cifar_2.h5')
    pred_models = [cifar_0, cifar_1, cifar_2]

    # ENSEMBLE THE PREDICTIONS
    y_list = [
        pred_models[m].predict(test_images, batch_size=64)
        for m in range(args.models)
    ]
    y_list = sum(y_list) / len(y_list)

    # Generate test set annotations, but in args.logdir to allow parallel execution.
    with open("cifar_competition_test.txt", "w",
              encoding="utf-8") as predictions_file:
        for probs in y_list:
            print(np.argmax(probs), file=predictions_file)
예제 #7
0
args = parser.parse_args()
start_time = datetime.now()

logger = LogSaver(args.logdir)
logger.save(str(args), 'args')
logger.save(str(start_time), 'start time')

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.save(str(device), 'device')

# data
HOME = os.environ['HOME']
datadir = os.path.join(HOME, 'data/datasets/CIFAR10/numpy')
dataset = CIFAR10(datadir, device)
logger.save(str(dataset), 'dataset')
testloader = dataset.getTestList(500)

# model
start_epoch = 0
lr = args.lr
from resnet import ResNet18
model = ResNet18().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(),
                            lr=lr,
                            momentum=args.momentum,
                            weight_decay=args.weightdecay)
if args.resume:
    checkpoint = torch.load(args.resume)
예제 #8
0
def main(args):
    # Fix random seeds and threads
    np.random.seed(args.seed)
    tf.random.set_seed(args.seed)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)
    if args.recodex:
        tf.keras.utils.get_custom_objects(
        )["glorot_uniform"] = tf.initializers.GlorotUniform(seed=args.seed)
        tf.keras.utils.get_custom_objects(
        )["orthogonal"] = tf.initializers.Orthogonal(seed=args.seed)
        tf.keras.utils.get_custom_objects(
        )["uniform"] = tf.initializers.RandomUniform(seed=args.seed)

    # Create logdir name
    args.logdir = os.path.join(
        "logs", "{}-{}-{}".format(
            os.path.basename(globals().get("__file__", "notebook")),
            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), ",".join(
                ("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value)
                 for key, value in sorted(vars(args).items())))))

    # Load the data
    cifar = CIFAR10(size={"dev": 1000})

    # Create the model
    inputs = tf.keras.layers.Input(shape=[CIFAR10.H, CIFAR10.W, CIFAR10.C])
    hidden = tf.keras.layers.Conv2D(16, 3, 2, "same",
                                    activation=tf.nn.relu)(inputs)
    hidden = tf.keras.layers.Conv2D(16, 3, 1, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(24, 3, 2, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(24, 3, 1, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(32, 3, 2, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(32, 3, 1, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Flatten()(hidden)
    hidden = tf.keras.layers.Dense(200, activation=tf.nn.relu)(hidden)
    outputs = tf.keras.layers.Dense(CIFAR10.LABELS,
                                    activation=tf.nn.softmax)(hidden)

    # Train the model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=tf.optimizers.Adam(),
        loss=tf.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.metrics.SparseCategoricalAccuracy(name="accuracy")],
    )
    tb_callback = tf.keras.callbacks.TensorBoard(args.logdir,
                                                 histogram_freq=1,
                                                 update_freq=100,
                                                 profile_batch=0)

    # TODO: Create `train` and `dev` datasets by using
    # `tf.data.Dataset.from_tensor_slices` on cifar.train and cifar.dev.
    # The structure of a single example is inferred from the argument
    # of `from_tensor_slices` -- in our case we want each example to
    # be a pair of `(input_image, target_label)`, so we need to pass
    # a pair `(data["images"], data["labels"])` to `from_tensor_slices`.
    train = ...
    dev = ...

    # Simple data augmentation
    generator = tf.random.Generator.from_seed(args.seed)

    def train_augment(image, label):
        if generator.uniform([]) >= 0.5:
            image = tf.image.flip_left_right(image)
        image = tf.image.resize_with_crop_or_pad(image, CIFAR10.H + 6,
                                                 CIFAR10.W + 6)
        image = tf.image.resize(image, [
            generator.uniform(
                [], minval=CIFAR10.H, maxval=CIFAR10.H + 12, dtype=tf.int32),
            generator.uniform(
                [], minval=CIFAR10.W, maxval=CIFAR10.W + 12, dtype=tf.int32)
        ])
        image = tf.image.crop_to_bounding_box(
            image,
            target_height=CIFAR10.H,
            target_width=CIFAR10.W,
            offset_height=generator.uniform([],
                                            maxval=tf.shape(image)[0] -
                                            CIFAR10.H + 1,
                                            dtype=tf.int32),
            offset_width=generator.uniform([],
                                           maxval=tf.shape(image)[1] -
                                           CIFAR10.W + 1,
                                           dtype=tf.int32),
        )
        return image, label

    # TODO: Now prepare the training pipeline.
    # - first use `.take(5000)` method to utilize only the first 5000 examples
    # - call `.shuffle(5000, seed=args.seed)` to shuffle the data using
    #   the given seed and a buffer of the size of the whole data
    # - call `.map(train_augment)` to perform the dataset augmentation
    # - finally call `.batch(args.batch_size)` to generate batches
    # - optionally, you might want to add `.prefetch(tf.data.AUTOTUNE)` as
    #   the last call -- it allows the pipeline to run in parallel with
    #   the training process, dynamically adjusting the number of threads
    #   to fully saturate the training process
    train = ...

    # TODO: Prepare the `dev` pipeline
    # - just use `.batch(args.batch_size)` to generate batches
    dev = ...

    # Train
    logs = model.fit(train,
                     epochs=args.epochs,
                     validation_data=dev,
                     callbacks=[tb_callback])

    # Return dev set accuracy
    return logs.history["val_accuracy"][-1]
예제 #9
0
def main(args):
    # Fix random seeds and threads
    np.random.seed(args.seed)
    tf.random.set_seed(args.seed)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)
    if args.recodex:
        tf.keras.utils.get_custom_objects()["glorot_uniform"] = tf.initializers.GlorotUniform(seed=args.seed)
        tf.keras.utils.get_custom_objects()["orthogonal"] = tf.initializers.Orthogonal(seed=args.seed)
        tf.keras.utils.get_custom_objects()["uniform"] = tf.initializers.RandomUniform(seed=args.seed)

    # Create logdir name
    args.logdir = os.path.join("logs", "{}-{}-{}".format(
        os.path.basename(globals().get("__file__", "notebook")),
        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items())))
    ))

    # Load the data
    cifar = CIFAR10(size={"dev": 1000})

    # Create the model
    inputs = tf.keras.layers.Input(shape=[CIFAR10.H, CIFAR10.W, CIFAR10.C])
    hidden = tf.keras.layers.Conv2D(16, 3, 2, "same", activation=tf.nn.relu)(inputs)
    hidden = tf.keras.layers.Conv2D(16, 3, 1, "same", activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(24, 3, 2, "same", activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(24, 3, 1, "same", activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(32, 3, 2, "same", activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(32, 3, 1, "same", activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Flatten()(hidden)
    hidden = tf.keras.layers.Dense(200, activation=tf.nn.relu)(hidden)
    outputs = tf.keras.layers.Dense(CIFAR10.LABELS, activation=tf.nn.softmax)(hidden)

    # Train the model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=tf.optimizers.Adam(),
        loss=tf.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.metrics.SparseCategoricalAccuracy(name="accuracy")],
    )
    tb_callback = tf.keras.callbacks.TensorBoard(args.logdir, histogram_freq=1, update_freq=100, profile_batch=0)

    # TODO: Create data augmenting `tf.keras.preprocessing.image.ImageDataGenerator`.
    # Specify:
    # - rotation range of 20 degrees,
    # - zoom range of 0.2 (20%),
    # - width shift range and height shift range of 0.1 (10%),
    # - allow horizontal flips
    train_generator = ...

    # TODO: Train using the generator. To augment data, use
    # `train_generator.flow` and specify:
    # - first 5000 of cifar.train.data["images"] as inputs
    # - first 5000 of cifar.train.data["labels"] as target
    # - batch_size of args.batch_size
    # - args.seed as random seed
    logs = model.fit(
        ...,
        shuffle=False, epochs=args.epochs,
        validation_data=(cifar.dev.data["images"], cifar.dev.data["labels"]),
        callbacks=[tb_callback],
    )

    # Return dev set accuracy
    return logs.history["val_accuracy"][-1]
예제 #10
0
def main(args: argparse.Namespace) -> Dict[str, float]:
    # Fix random seeds and threads
    tf.keras.utils.set_random_seed(args.seed)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create logdir name
    args.logdir = os.path.join(
        "logs", "{}-{}-{}".format(
            os.path.basename(globals().get("__file__", "notebook")),
            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), ",".join(
                ("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
                 for k, v in sorted(vars(args).items())))))

    # Load the data
    cifar = CIFAR10(size={"train": 5000, "dev": 1000})

    # Create the model
    inputs = tf.keras.layers.Input(shape=[CIFAR10.H, CIFAR10.W, CIFAR10.C])
    hidden = tf.keras.layers.Conv2D(16, 3, 2, "same",
                                    activation=tf.nn.relu)(inputs)
    hidden = tf.keras.layers.Conv2D(16, 3, 1, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(24, 3, 2, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(24, 3, 1, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(32, 3, 2, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Conv2D(32, 3, 1, "same",
                                    activation=tf.nn.relu)(hidden)
    hidden = tf.keras.layers.Flatten()(hidden)
    hidden = tf.keras.layers.Dense(200, activation=tf.nn.relu)(hidden)
    outputs = tf.keras.layers.Dense(CIFAR10.LABELS,
                                    activation=tf.nn.softmax)(hidden)

    # Train the model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=tf.optimizers.Adam(),
        loss=tf.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.metrics.SparseCategoricalAccuracy(name="accuracy")],
    )
    tb_callback = tf.keras.callbacks.TensorBoard(args.logdir)

    # TODO: Create data augmenting `tf.keras.preprocessing.image.ImageDataGenerator`.
    # Specify:
    # - rotation range of 20 degrees,
    # - zoom range of 0.2 (20%),
    # - width shift range and height shift range of 0.1 (10%),
    # - allow horizontal flips
    train_generator = ...

    # TODO: Train using the generator. To augment data, use
    # `train_generator.flow` and specify:
    # - `cifar.train.data["images"]` as inputs
    # - `cifar.train.data["labels"]` as target
    # - batch_size of `args.batch_size`
    # - `args.seed` as the random seed
    logs = model.fit(
        ...,
        shuffle=False,
        epochs=args.epochs,
        validation_data=(cifar.dev.data["images"], cifar.dev.data["labels"]),
        callbacks=[tb_callback],
    )

    # Return development metrics for ReCodEx to validate
    return {
        metric: values[-1]
        for metric, values in logs.history.items() if metric.startswith("val_")
    }
예제 #11
0
    # Fix random seeds and number of threads
    np.random.seed(42)
    tf.random.set_seed(42)
    if args.recodex:
        tf.keras.utils.get_custom_objects(
        )["glorot_uniform"] = lambda: tf.keras.initializers.glorot_uniform(seed
                                                                           =42)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create logdir name
    args.logdir = os.path.join(
        "logs", "{}-{}-{}".format(
            os.path.basename(__file__),
            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), ",".join(
                ("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value)
                 for key, value in sorted(vars(args).items())))))

    # Load the data
    mnist = CIFAR10()

    # Create the network and train
    network = Network(args)
    network.train(mnist, args)

    # Compute test set accuracy and print it
    accuracy = network.test(mnist, args)
    with open("mnist_cnn.out", "w") as out_file:
        print("{:.2f}".format(100 * accuracy), file=out_file)
예제 #12
0
    def __init__(self, args):
        self.args = args

        # Initial Data Transform
        transform_img = [
            transforms.ToTensor(),
        ]

        # Normalisation Transform
        if self.args.pretrained:
            normalise = [
                transforms.Normalize(self.args.imagenet_mean_color,
                                     self.args.imagenet_std_color)
            ]
        else:
            normalise = [
                transforms.Normalize(self.args.cifar10_mean_color,
                                     self.args.cifar10_std_color)
            ]

        # Model Specific Transform
        if self.args.model == "alexnet":
            resize = [transforms.Resize((224, 224))]
        else:
            resize = []

        # Any Data Augmentation
        data_augmentation = []

        if self.args.data_aug:
            if self.args.model == "resnet":
                data_augmentation = [
                    transforms.RandomHorizontalFlip(),
                    transforms.RandomAffine(degrees=0.0,
                                            translate=(0.1, 0.1),
                                            resample=PIL.Image.NEAREST),
                ]
            else:
                data_augmentation = [
                    transforms.RandomCrop(self.args.random_crop_size,
                                          padding=self.args.random_crop_pad),
                    transforms.RandomHorizontalFlip(),
                ]

        # Combine all transformations
        transform_img_train = data_augmentation + resize + transform_img + normalise
        transform_img_test = resize + transform_img + normalise

        self.transform_train = transforms.Compose(transform_img_train)
        self.transform_test = transforms.Compose(transform_img_test)

        # Datasets and DataLoaders
        if self.args.eval is False:
            if self.args.training_mode == "supervised":
                # ########################## Fully Supervised ################################### #
                self.full_supervised_train_dataset = CIFAR10(
                    self.args.cifar10_dir,
                    split="train",
                    download=True,
                    transform=self.transform_train,
                )
                if self.args.full_data:
                    self.train_dataset = self.full_supervised_train_dataset
                else:
                    # ###################### Partially Supervised ############################### #
                    (
                        train_labeled_idxs,
                        train_unlabeled_idxs,
                    ) = get_train_indices_for_ssl(
                        self.full_supervised_train_dataset,
                        self.args.train_data_size)
                    self.train_labeled_indices = train_labeled_idxs

                    self.supervised_train_dataset = CIFAR10(
                        self.args.cifar10_dir,
                        split="train",
                        train_split_supervised_indices=np.array(
                            self.train_labeled_indices),
                        download=True,
                        transform=self.transform_train,
                    )
                    self.train_dataset = self.supervised_train_dataset

                self.train_loader = torch.utils.data.DataLoader(
                    self.train_dataset,
                    batch_size=self.args.batch_size,
                    shuffle=True)

            elif self.args.training_mode == "semi-supervised" or "gmm":
                # ########################### Semi Supervised ################################### #
                self.full_supervised_train_dataset = CIFAR10(
                    self.args.cifar10_dir,
                    split="train",
                    download=True,
                    transform=self.transform_train,
                )
                train_labeled_idxs, train_unlabeled_idxs = get_train_indices_for_ssl(
                    self.full_supervised_train_dataset,
                    self.args.train_data_size)
                self.train_labeled_indices = train_labeled_idxs
                self.train_unlabeled_indices = train_unlabeled_idxs

                self.supervised_train_dataset = CIFAR10(
                    self.args.cifar10_dir,
                    split="train",
                    train_split_supervised_indices=np.array(
                        self.train_labeled_indices),
                    download=True,
                    transform=self.transform_train,
                )
                self.unsupervised_train_dataset = CIFAR10(
                    self.args.cifar10_dir,
                    split="train",
                    train_split_supervised_indices=np.array(
                        self.train_unlabeled_indices),
                    download=True,
                    transform=self.transform_train,
                )
                self.supervised_train_loader = torch.utils.data.DataLoader(
                    self.supervised_train_dataset,
                    batch_size=self.args.batch_size,
                    shuffle=True,
                )
                self.unsupervised_train_loader = torch.utils.data.DataLoader(
                    self.unsupervised_train_dataset,
                    batch_size=self.args.ssl_label_generation_batch_size,
                    shuffle=True,
                )
            # ############################## Val Split ########################################## #
            self.val_dataset = CIFAR10(
                self.args.cifar10_dir,
                split="val",
                download=True,
                transform=self.transform_test,
            )
            self.val_loader = torch.utils.data.DataLoader(
                self.val_dataset,
                batch_size=self.args.test_batch_size,
                shuffle=True)
        # ################################ Test Split ########################################### #
        self.test_dataset = CIFAR10(
            self.args.cifar10_dir,
            split="test",
            download=True,
            transform=self.transform_test,
        )
        self.test_loader = torch.utils.data.DataLoader(
            self.test_dataset,
            batch_size=self.args.test_batch_size,
            shuffle=True)
예제 #13
0
    def ssl_init_epoch(self, predictions_indices=[], predictions_labels=[]):

        self.train_labeled_indices.extend(predictions_indices)
        self.supervised_train_dataset = CIFAR10(
            self.args.cifar10_dir,
            split="train",
            train_split_supervised_indices=np.array(
                self.train_labeled_indices),
            download=True,
            transform=self.transform_train,
        )
        if len(predictions_indices) != 0:
            indices2array_indices = {
                idx: i
                for i, idx in enumerate(
                    self.supervised_train_dataset.train_indices)
            }
            array_indices = np.array([
                indices2array_indices[index] for index in predictions_indices
            ])
            self.supervised_train_dataset.train_labels[
                array_indices] = predictions_labels

            self.train_unlabeled_indices = [
                idx for idx in self.train_unlabeled_indices
                if idx not in predictions_indices
            ]

        assert len(self.train_labeled_indices) + len(
            self.train_unlabeled_indices) == len(
                self.full_supervised_train_dataset.train_labels)

        self.supervised_train_loader = torch.utils.data.DataLoader(
            self.supervised_train_dataset,
            batch_size=self.args.batch_size,
            shuffle=True)

        if len(self.train_unlabeled_indices) != 0:
            self.unsupervised_train_dataset = CIFAR10(
                self.args.cifar10_dir,
                split="train",
                train_split_supervised_indices=np.array(
                    self.train_unlabeled_indices),
                download=True,
                transform=self.transform_train,
            )
            self.unsupervised_train_loader = torch.utils.data.DataLoader(
                self.unsupervised_train_dataset,
                batch_size=self.args.ssl_label_generation_batch_size,
                shuffle=True,
            )
        else:
            self.stop_label_generation = True

        self.train_loader = self.supervised_train_loader

        print("Labeled Training data: %d/%d" % (
            len(self.supervised_train_dataset.train_labels),
            len(self.full_supervised_train_dataset.train_labels),
        ))
        print("Unlabeled Training data: %d/%d" % (
            len(self.supervised_train_dataset.train_labels),
            len(self.full_supervised_train_dataset.train_labels),
        ))
예제 #14
0
from ngraph.frontends.neon import Affine, Preprocess, Convolution, Pool2D, Sequential
from ngraph.frontends.neon import UniformInit, Rectlin, Softmax, GradientDescentMomentum
from ngraph.frontends.neon import ax, loop_train, make_bound_computation, make_default_callbacks
from ngraph.frontends.neon import NgraphArgparser
from ngraph.frontends.neon import ArrayIterator

from cifar10 import CIFAR10
import ngraph.transformers as ngt

parser = NgraphArgparser(description='Train simple CNN on cifar10 dataset')
args = parser.parse_args()

np.random.seed(args.rng_seed)

# Create the dataloader
train_data, valid_data = CIFAR10(args.data_dir).load_data()
train_set = ArrayIterator(train_data,
                          args.batch_size,
                          total_iterations=args.num_iterations)
valid_set = ArrayIterator(valid_data, args.batch_size)
######################
# Model specification


def cifar_mean_subtract(x):
    bgr_mean = ng.persistent_tensor(axes=x.axes[0],
                                    initial_value=np.array([[104., 119.,
                                                             127.]]))
    y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1)
    return y
예제 #15
0
# classifier
weights = tf.Variable(tf.zeros([9216, n_classes]), name="output_weight")
bias = tf.Variable(tf.truncated_normal([n_classes]), name="output_bias")
model = tf.matmul(extractor, weights) + bias
outputs = tf.placeholder(tf.float32, [None, n_classes])

# ====================
# config dataset
# ====================
print('Prepare dataset')
if data_name == 'ucf101':
    train_dataset = UCF101('rgb', 'trainlist01', batch_size = batch_size)
    test_dataset = UCF101('rgb', 'testlist01b', batch_size = batch_size)
elif data_name == 'cifar10':
    train_dataset = CIFAR10(split_name = 'train', batch_size = batch_size)
    test_dataset = CIFAR10(split_name = 'val', batch_size = batch_size)
    
init = tf.initialize_all_variables()

trian_features = None
train_label = None
with tf.Session(config=config) as sess:
    sess.run(init)
    for i in tqdm(range(20), unit=" batch "):
        this_batch = train_dataset.batch(i)
        train_X, train_y = helper.reshape_batch(this_batch, (image_size, image_size), n_classes)
        train_y = [np.argmax(element) for element in train_y]
        features = sess.run(
            [extractor],
            feed_dict={
예제 #16
0
    args = parser.parse_args()

    # Fix random seeds
    np.random.seed(42)
    tf.random.set_seed(42)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create logdir name
    args.logdir = os.path.join(
        "logs", "{}-{}-{}".format(
            os.path.basename(__file__),
            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), ",".join(
                ("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value)
                 for key, value in sorted(vars(args).items())))))

    # Load data
    cifar = CIFAR10()

    # Create the network and train
    network = Network(args)
    network.train(cifar, args)

    # Generate test set annotations, but in args.logdir to allow parallel execution.
    with open(os.path.join(args.logdir, "cifar_competition_test.txt"),
              "w",
              encoding="utf-8") as out_file:
        for probs in network.predict(cifar.test.data["images"],
                                     batch_size=args.batch_size):
            print(np.argmax(probs), file=out_file)
#%%
from cifar10 import CIFAR10
c = CIFAR10()
c.data_augmentation(10000)

#%%
from scipy import io as io
import numpy as np
c.y_train = np.argmax(c.y_train, axis=1).astype("int32")
c.y_test = np.argmax(c.y_test, axis=1).astype("int32")
io.savemat("train.mat", {"x": c.x_train, "y": c.y_train})
io.savemat("test.mat", {"x": c.x_test, "y": c.y_test})