def test(run, batch_size, competition, dataset): # data set paths dset = config.DataSet(dataset) with open(dset.path_for_run(run) + 'params.yaml') as f: params = yaml.load(f) # load model import keras.models model = keras.models.load_model(dset.path_for_run(run) + 'model.h5') model.compile(optimizer=keras.optimizers.Adam(lr=params['learning_rate']), loss='categorical_crossentropy', metrics=['accuracy']) # load test batches from fastai.vgg16 import Vgg16 test_batches = Vgg16.get_batches(dset.test_path, shuffle=False, batch_size=batch_size * 2, class_mode=None) # predict preds = model.predict_generator(test_batches, test_batches.nb_sample) # format dataframe df = submission_df(test_batches, preds) df.label = df.label.clip(0.05, 0.95) df.to_csv(dset.path_for_run(run) + 'submission.csv', index=True) # submit if competition is None: return kaggle.submit(run, competition, dataset)
def train(epochs, batch_size, learning_rate, num_trainable, dropout, dataset): dset = config.DataSet(dataset) utils.mkdir_p(dset.run_path) with open(dset.run_path + 'params.yaml', 'w') as f: f.write( yaml.dump({ 'epochs': epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'trainable': num_trainable, 'dropout': dropout, 'dataset': dataset, })) # create model from keras import backend as K from keras.callbacks import CSVLogger from fastai.vgg16 import Vgg16 vgg = Vgg16(dropout=dropout) # get the batches batches = vgg.get_batches(dset.train_path, batch_size=batch_size) val_batches = vgg.get_batches(dset.validate_path, batch_size=batch_size * 2) # fine tune the network and optimization vgg.finetune(batches) vgg.compile(learning_rate) # fit the data csv_logger = CSVLogger(dset.run_path + 'train_log.csv', append=True) vgg.fit(batches, val_batches, nb_epoch=1, callbacks=[csv_logger]) if epochs > 1: vgg.trainable_layers(num_trainable) vgg.fit(batches, val_batches, nb_epoch=epochs - 1, callbacks=[csv_logger]) # save the model model_fn = 'model.h5' vgg.model.save(dset.run_path + model_fn) # predict validation set and save batches, preds = vgg.test(dset.validate_path, batch_size=batch_size * 2) df = test_df(batches, preds) df.to_csv(dset.run_path + 'validate.csv', index=True)
def submit(run, competition, dataset): if competition is None: competition = dataset # data set paths dataset_obj = config.DataSet(dataset) if run == -1: run = None run_path = dataset_obj.path_for_run(run) cmd = [ 'kg', 'submit', '-u', os.environ['KAGGLE_USERNAME'], '-p', os.environ['KAGGLE_PASSWORD'], '-c', competition, run_path + 'submission.csv' ] subprocess.call(cmd)