def test(run, batch_size, competition, dataset):

    # data set paths
    dset = config.DataSet(dataset)

    with open(dset.path_for_run(run) + 'params.yaml') as f:
        params = yaml.load(f)

    # load model
    import keras.models
    model = keras.models.load_model(dset.path_for_run(run) + 'model.h5')
    model.compile(optimizer=keras.optimizers.Adam(lr=params['learning_rate']),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # load test batches
    from fastai.vgg16 import Vgg16
    test_batches = Vgg16.get_batches(dset.test_path,
                                     shuffle=False,
                                     batch_size=batch_size * 2,
                                     class_mode=None)

    # predict
    preds = model.predict_generator(test_batches, test_batches.nb_sample)

    # format dataframe
    df = submission_df(test_batches, preds)
    df.label = df.label.clip(0.05, 0.95)
    df.to_csv(dset.path_for_run(run) + 'submission.csv', index=True)

    # submit
    if competition is None:
        return
    kaggle.submit(run, competition, dataset)
Example #2
0
def train(epochs, batch_size, learning_rate, num_trainable, dropout, dataset):

    dset = config.DataSet(dataset)

    utils.mkdir_p(dset.run_path)

    with open(dset.run_path + 'params.yaml', 'w') as f:
        f.write(
            yaml.dump({
                'epochs': epochs,
                'batch_size': batch_size,
                'learning_rate': learning_rate,
                'trainable': num_trainable,
                'dropout': dropout,
                'dataset': dataset,
            }))

    # create model
    from keras import backend as K
    from keras.callbacks import CSVLogger
    from fastai.vgg16 import Vgg16
    vgg = Vgg16(dropout=dropout)

    # get the batches
    batches = vgg.get_batches(dset.train_path, batch_size=batch_size)
    val_batches = vgg.get_batches(dset.validate_path,
                                  batch_size=batch_size * 2)

    # fine tune the network and optimization
    vgg.finetune(batches)
    vgg.compile(learning_rate)

    # fit the data
    csv_logger = CSVLogger(dset.run_path + 'train_log.csv', append=True)
    vgg.fit(batches, val_batches, nb_epoch=1, callbacks=[csv_logger])

    if epochs > 1:
        vgg.trainable_layers(num_trainable)
        vgg.fit(batches,
                val_batches,
                nb_epoch=epochs - 1,
                callbacks=[csv_logger])

    # save the model
    model_fn = 'model.h5'
    vgg.model.save(dset.run_path + model_fn)

    # predict validation set and save
    batches, preds = vgg.test(dset.validate_path, batch_size=batch_size * 2)
    df = test_df(batches, preds)
    df.to_csv(dset.run_path + 'validate.csv', index=True)
Example #3
0
def submit(run, competition, dataset):

    if competition is None:
        competition = dataset

    # data set paths
    dataset_obj = config.DataSet(dataset)

    if run == -1:
        run = None
    run_path = dataset_obj.path_for_run(run)

    cmd = [
        'kg', 'submit', '-u', os.environ['KAGGLE_USERNAME'], '-p',
        os.environ['KAGGLE_PASSWORD'], '-c', competition,
        run_path + 'submission.csv'
    ]

    subprocess.call(cmd)