Example #1
0
def main(run, data_path, output_path, log_path, layer_width, batch_size,
         epochs, learning_rate):
    info('Data')
    file = download(data_path)

    with np.load(file) as f:
        x_train, y_train = f['x_train'], f['y_train']
        x_test, y_test = f['x_test'], f['y_test']

    print('Drawing samples...')
    draw_samples(run, log_path, 'training_set', x_train, y_train)
    draw_samples(run, log_path, 'test_set', x_test, y_test)
    print('Done!')

    train_set = x_train.reshape((len(x_train), -1)) / 255.

    info('Training')

    # function shape
    model = keras.Sequential([
        keras.layers.Reshape((28, 28, 1)),
        keras.layers.Conv2D(32, (3, 3), activation='relu'),
        keras.layers.Conv2D(64, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Dropout(0.25),
        keras.layers.Flatten(),
        keras.layers.Dense(layer_width, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(10, activation='softmax')
    ])

    # how to optimize the function
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    # callbacks
    logaml = AMLCallback(run)
    filename = datetime.now().strftime("%d.%b.%Y.%H.%M")
    checkpoint = ModelCheckpoint(
        os.path.join(output_path,
                     filename + '.e{epoch:02d}-{accuracy:.2f}.hdf5'))

    model.fit(train_set,
              y_train,
              epochs=epochs,
              batch_size=batch_size,
              callbacks=[logaml, checkpoint])

    model.summary()

    # add time prefix folder
    file_output = os.path.join(output_path, 'latest.hdf5')
    print('Serializing h5 model to:\n{}'.format(file_output))
    model.save(file_output)

    info('Test')
    test_set = x_test.reshape((len(x_test), -1)) / 255.
    test_loss, test_acc = model.evaluate(test_set, y_test)
    print('\nTest accuracy:', test_acc)
Example #2
0
def main(
    run,
    data_path,
    output_path,
    log_path,
    layer_width,
    batch_size,
    epochs,
    learning_rate,
):
    info("Data")
    file = download(data_path)

    with np.load(file) as f:
        x_train, y_train = f["x_train"], f["y_train"]
        x_test, y_test = f["x_test"], f["y_test"]

    print("Drawing samples...")
    draw_samples(run, log_path, "training_set", x_train, y_train)
    draw_samples(run, log_path, "test_set", x_test, y_test)
    print("Done!")

    train_set = x_train.reshape((len(x_train), -1)) / 255.0

    info("Training")

    # function shape
    model = keras.Sequential([
        keras.layers.Dense(layer_width, activation="relu"),
        keras.layers.Dense(10, activation="softmax"),
    ])

    # how to optimize the function
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )

    # callbacks
    logaml = AMLCallback(run)
    filename = datetime.now().strftime("%d.%b.%Y.%H.%M")
    checkpoint = ModelCheckpoint(
        os.path.join(output_path,
                     filename + ".e{epoch:02d}-{accuracy:.2f}.hdf5"))

    model.fit(
        train_set,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[logaml, checkpoint],
    )

    info("Test")
    test_set = x_test.reshape((len(x_test), -1)) / 255.0
    test_loss, test_acc = model.evaluate(test_set, y_test)
    print("\nTest accuracy:", test_acc)
Example #3
0
def main(run, data_path, output_path, log_path, layer_width, batch_size,
         epochs, learning_rate):
    info('Data')
    file = download(data_path)

    with np.load(file) as f:
        x_train, y_train = f['x_train'], f['y_train']
        x_test, y_test = f['x_test'], f['y_test']

    print('Drawing samples...')
    draw_samples(run, log_path, 'training_set', x_train, y_train)
    draw_samples(run, log_path, 'test_set', x_test, y_test)
    print('Done!')

    train_set = x_train.reshape((len(x_train), -1)) / 255.

    info('Training')

    # function shape
    model = keras.Sequential([
        keras.layers.Dense(layer_width, activation='relu'),
        keras.layers.Dense(10, activation='softmax')
    ])

    # how to optimize the function
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    # callbacks
    logaml = AMLCallback(run)
    filename = datetime.now().strftime("%d.%b.%Y.%H.%M")
    checkpoint = ModelCheckpoint(
        os.path.join(output_path,
                     filename + '.e{epoch:02d}-{accuracy:.2f}.hdf5'))

    model.fit(train_set,
              y_train,
              epochs=epochs,
              batch_size=batch_size,
              callbacks=[logaml, checkpoint])

    info('Test')
    test_set = x_test.reshape((len(x_test), -1)) / 255.
    test_loss, test_acc = model.evaluate(test_set, y_test)
    print('\nTest accuracy:', test_acc)
Example #4
0
def main(run, source_path, target_path, epochs, batch, lr):
    info('Preprocess')

    print(f'Using Tensorflow v.{tf.__version__}')
    print(
        f'GPUs Available: {len(tf.config.experimental.list_physical_devices("GPU"))}'
    )

    if not os.path.exists(target_path):
        os.makedirs(target_path)

    # load tfrecord metadata
    prep_step = os.path.join(source_path, 'metadata.json')
    with open(prep_step) as f:
        prep = json.load(f)

    for i in prep:
        print('{} => {}'.format(i, prep[i]))

    labels = prep['categories']
    img_shape = (prep['image_size'], prep['image_size'], 3)
    record_sz = prep['records']

    records = os.path.join(source_path, prep['file'])
    print('Loading {}'.format(records))
    with open(records, 'r') as f:
        filenames = [
            os.path.join(source_path, s.strip()) for s in f.readlines()
        ]

    print('Splitting data:')
    train, test = split(filenames)
    print('  Train: {}'.format(len(train)))
    print('   Test: {}'.format(len(test)))

    print('Creating training dataset')
    train_ds = tf.data.TFRecordDataset(train)
    train_ds = train_ds.map(map_func=parse_record, num_parallel_calls=5)
    train_ds = train_ds.shuffle(buffer_size=10000)
    train_ds = train_ds.batch(batch)
    train_ds = train_ds.prefetch(buffer_size=5)
    train_ds = train_ds.repeat(epochs)

    # model
    info('Creating Model')
    base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
                                                   include_top=False,
                                                   weights='imagenet',
                                                   pooling='avg')

    base_model.trainable = True

    model = tf.keras.Sequential(
        [base_model,
         tf.keras.layers.Dense(len(labels), activation='softmax')])

    model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    # training
    info('Training')

    # callbacks
    logaml = AMLCallback(run)
    filename = datetime.now().strftime("%d.%b.%Y.%H.%M")
    checkpoint = ModelCheckpoint(os.path.join(
        target_path,
        filename + '.e{epoch:02d}-{accuracy:.2f}-v{val_accuracy:.2f}.hdf5'),
                                 monitor='val_accuracy',
                                 save_best_only=True)

    # using both test and val in this case
    test_ds = tf.data.TFRecordDataset(test).map(parse_record).batch(batch)
    test_steps = math.ceil((len(test) * record_sz) / batch)

    steps_per_epoch = math.ceil((len(train) * record_sz) / batch)
    history = model.fit(train_ds,
                        epochs=epochs,
                        steps_per_epoch=steps_per_epoch,
                        callbacks=[logaml, checkpoint],
                        validation_data=test_ds,
                        validation_steps=test_steps)

    info('Writing metadata')
    out_file = os.path.join(target_path, 'metadata.json')
    output = {
        'image_size': prep['image_size'],
        'categories': prep['categories'],
        'index': prep['index'],
        'generated': datetime.now().strftime('%m/%d/%y %H:%M:%S'),
        'run': str(run.id)
    }

    print('Writing out metadata to {}'.format(out_file))
    with open(str(out_file), 'w') as f:
        json.dump(output, f)
    print('Done!')
Example #5
0
def main(run, data_path, output_path, target_output, epochs, batch, lr):
    info('Preprocess')
    # load tfrecord metadata
    prep_step = os.path.join(output_path, 'prep.json')
    with open(prep_step) as f:
        prep = json.load(f)

    for i in prep:
        print('{} => {}'.format(i, prep[i]))

    labels = prep['categories']
    img_shape = (prep['image_size'], prep['image_size'], 3)
    record_sz = prep['records']

    records = os.path.join(data_path, prep['file'])
    print('Loading {}'.format(records))
    with open(records, 'r') as f:
        filenames = [os.path.join(data_path, s.strip()) for s in f.readlines()]
    
    print('Splitting data:')
    train, test, val = split(filenames)
    print('  Train: {}'.format(len(train)))
    print('   Test: {}'.format(len(test)))
    print('    Val: {}'.format(len(val)))

    print('Creating training dataset')
    train_ds = tf.data.TFRecordDataset(train)
    train_ds = train_ds.map(map_func=parse_record, num_parallel_calls=5)
    train_ds = train_ds.shuffle(buffer_size=10000)
    train_ds = train_ds.batch(batch)
    train_ds = train_ds.prefetch(buffer_size=5)
    train_ds = train_ds.repeat(epochs)

    # model
    info('Creating Model')
    #base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
    #                                           include_top=False, 
    #                                           weights='imagenet',
    #                                           pooling='avg')

    base_model = tf.keras.applications.VGG19(input_shape=img_shape,
                                               include_top=False, 
                                               weights='imagenet',
                                               pooling='avg')

    base_model.trainable = True

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.Dense(len(labels), activation='softmax')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr), 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

    model.summary()
    
    # training
    info('Training')

    # callbacks
    logaml = AMLCallback(run)
    filename = datetime.now().strftime("%d.%b.%Y.%H.%M")
    model_path = os.path.join(target_output, 'model')
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    checkpoint = ModelCheckpoint(os.path.join(model_path, filename + '.e{epoch:02d}-{accuracy:.2f}.hdf5'))


    test_ds = tf.data.TFRecordDataset(test).map(parse_record).batch(batch)
    test_steps = math.ceil((len(test)*record_sz)/batch)

    steps_per_epoch = math.ceil((len(train)*record_sz)/batch)
    history = model.fit(train_ds, 
                    epochs=epochs, 
                    steps_per_epoch=steps_per_epoch,
                    callbacks=[logaml, checkpoint],
                    validation_data=test_ds,
                    validation_steps=test_steps)

    print('Done!')
Example #6
0
def main(
    run,
    data_path,
    output_path,
    log_path,
    layer_width,
    batch_size,
    epochs,
    learning_rate,
):
    info("Data")
    file = download(data_path)

    with np.load(file) as f:
        x_train, y_train = f["x_train"], f["y_train"]
        x_test, y_test = f["x_test"], f["y_test"]

    print("Drawing samples...")
    draw_samples(run, log_path, "training_set", x_train, y_train)
    draw_samples(run, log_path, "test_set", x_test, y_test)
    print("Done!")

    train_set = x_train.reshape((len(x_train), -1)) / 255.0

    info("Training")

    # function shape
    model = keras.Sequential([
        keras.layers.Reshape((28, 28, 1)),
        keras.layers.Conv2D(32, (3, 3), activation="relu"),
        keras.layers.Conv2D(64, (3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Dropout(0.25),
        keras.layers.Flatten(),
        keras.layers.Dense(layer_width, activation="relu"),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(10, activation="softmax"),
    ])

    # how to optimize the function
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )

    # callbacks
    logaml = AMLCallback(run)
    filename = datetime.now().strftime("%d.%b.%Y.%H.%M")
    checkpoint = ModelCheckpoint(
        os.path.join(output_path,
                     filename + ".e{epoch:02d}-{accuracy:.2f}.hdf5"))

    model.fit(
        train_set,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[logaml, checkpoint],
    )

    model.summary()

    # add time prefix folder
    file_output = os.path.join(output_path, "latest.hdf5")
    print("Serializing h5 model to:\n{}".format(file_output))
    model.save(file_output)

    info("Test")
    test_set = x_test.reshape((len(x_test), -1)) / 255.0
    test_loss, test_acc = model.evaluate(test_set, y_test)
    print("\nTest accuracy:", test_acc)
def main(run, source_path, target_path, epochs, batch, lr):
    info('Preprocess')
    if not os.path.exists(target_path):
        os.makedirs(target_path)

    # load tfrecord metadata
    prep_step = os.path.join(source_path, 'metadata.json')
    with open(prep_step) as f:
        prep = json.load(f)

    for i in prep:
        print('{} => {}'.format(i, prep[i]))

    if not run.id.startswith('OfflineRun'):
        run.log('total_records', prep['total_records'])

    labels = prep['categories']
    img_shape = (prep['image_size'], prep['image_size'], 3)
    record_sz = prep['records']

    records = os.path.join(source_path, prep['file'])
    print('Loading {}'.format(records))
    with open(records, 'r') as f:
        filenames = [
            os.path.join(source_path, s.strip()) for s in f.readlines()
        ]

    print('Splitting data:')
    train, test = split(filenames)
    print('  Train: {}'.format(len(train)))
    print('   Test: {}'.format(len(test)))

    print('Creating training dataset')
    train_ds = tf.data.TFRecordDataset(train)
    train_ds = train_ds.map(map_func=parse_record, num_parallel_calls=5)
    train_ds = train_ds.shuffle(buffer_size=10000)
    train_ds = train_ds.batch(batch)
    train_ds = train_ds.prefetch(buffer_size=5)
    train_ds = train_ds.repeat(epochs)

    # model
    info('Creating Model')
    # base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape,
    #                                            include_top=False,
    #                                            weights='imagenet',
    #                                            pooling='avg')

    base_model = tf.keras.applications.VGG19(input_shape=img_shape,
                                             include_top=False,
                                             weights='imagenet',
                                             pooling='avg')

    #base_model = tf.keras.applications.ResNet50(input_shape=img_shape,
    #                                           include_top=False,
    #                                           weights='imagenet',
    #                                           pooling='avg')

    tf.keras.applications

    base_model.trainable = True

    # # Transfer learning to boost the model's accuracy
    # # UnFreeze the following layers
    # trainableLayers = ['global_average_pooling2d','block5_pool','block5_conv4']
    # # Layer names from:
    # # https://www.tensorflow.org/tutorials/generative/style_transfer
    # for layer in base_model.layers:
    #      print("Detected layer " + layer.name)
    #      if layer.name in trainableLayers:
    #          print(" - Trainable " + layer.name)
    #          layer.trainable = True
    #      else:
    #          layer.trainable = False

    model = tf.keras.Sequential(
        [base_model,
         tf.keras.layers.Dense(len(labels), activation='softmax')])

    model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    # training
    info('Training')

    # callbacks
    logaml = AMLCallback(run)
    filename = datetime.now().strftime("%d.%b.%Y.%H.%M")
    checkpoint = ModelCheckpoint(os.path.join(
        target_path,
        filename + '.e{epoch:02d}-{accuracy:.2f}-v{val_accuracy:.2f}.hdf5'),
                                 monitor='val_accuracy',
                                 save_best_only=True)

    # using both test and val in this case
    test_ds = tf.data.TFRecordDataset(test).map(parse_record).batch(batch)
    test_steps = math.ceil((len(test) * record_sz) / batch)

    steps_per_epoch = math.ceil((len(train) * record_sz) / batch)
    history = model.fit(train_ds,
                        epochs=epochs,
                        steps_per_epoch=steps_per_epoch,
                        callbacks=[logaml, checkpoint],
                        validation_data=test_ds,
                        validation_steps=test_steps)

    info('Writing metadata')
    out_file = os.path.join(target_path, 'metadata.json')
    output = {
        'image_size': prep['image_size'],
        'categories': prep['categories'],
        'index': prep['index'],
        'generated': datetime.now().strftime('%m/%d/%y %H:%M:%S'),
    }

    # If we are online
    if not run.id.startswith('OfflineRun'):
        # Log history
        for i in history.history:
            # We want to log only the last run metric
            run.log(i, history.history[i][len(history.history[i]) - 1])

    print('Writing out metadata to {}'.format(out_file))
    with open(str(out_file), 'w') as f:
        json.dump(output, f)
    print('Done!')