Ejemplo n.º 1
0
def main():
    start = datetime.now()
    # get the data
    test_data = helpers.load_data(numpy_path, 'test_set.npy')
    test_data_labels = test_data[:, 2]
    test_data_labels = np.array([item[0] for item in test_data_labels])
    test_data_countries = test_data[:, 0]
    test_data_countries = np.array([item[0] for item in test_data_countries])

    # convert the data
    test_dataset = convert_dataset(test_data, batchsize=1000)

    # checkpoint dir
    checkpoint_dir = os.path.join(logdir, expname, 'checkpoints/')

    print('[INFO] Starting feature importance')

    predictions = []
    predictions_feature = []
    test_dataset_feature = convert_dataset_feature_importance(test_data)

    for i in range(1, 11):
        #print('Round number: '+str(i))
        model = modelprovider.build_multi_input_model((15, ), (2, 19))

        model.compile(loss=loss.crps_cost_function, optimizer=Adam())

        model.load_weights(
            os.path.join(checkpoint_dir, 'round-' + str(i) +
                         '/best_checkpoint')).expect_partial()

        predictions.append(
            model.predict(test_dataset, batch_size=1000, verbose=0))
        predictions_feature.append(
            model.predict(test_dataset_feature, batch_size=1000, verbose=0))

    predictions = np.array(predictions)
    predictions_feature = np.array(predictions_feature)

    # Make sure std is positive
    predictions[:, :, 1] = np.abs(predictions[:, :, 1])
    predictions_feature[:, :, 1] = np.abs(predictions_feature[:, :, 1])

    mean_predictions = np.mean(predictions, 0)
    mean_predictions_feature = np.mean(predictions_feature, 0)

    test_crps = crps.norm_data(test_data_labels, mean_predictions)
    test_crps_feature = crps.norm_data(test_data_labels,
                                       mean_predictions_feature)
    print(round(test_crps_feature.mean(), 2))
    print(round(test_crps.mean(), 2))
    test_score = round((1 - test_crps_feature.mean() / test_crps.mean()) * 100,
                       2)
    print(test_crps_feature.mean())

    print(datetime.now() - start)
Ejemplo n.º 2
0
def printIntMonth(test_data_labels, test_data_month, mean_predictions):
    test_crps = crps.norm_data(test_data_labels, mean_predictions)
    for i in range(1, 13):
        filter = test_data_month == i
        filter_data = test_crps[filter]
        if len(filter_data) > 0:
            item = (i, round(np.array(filter_data).mean(), 2))
        else:
            item = (i, 0)
        print(item)
Ejemplo n.º 3
0
def printIntCountries(test_data_labels, test_data_countries, mean_predictions):
    test_crps = crps.norm_data(test_data_labels, mean_predictions)
    test_score = round(test_crps.mean(), 2)
    result = str(test_score)
    for i in [8, 16, 2, 5, 20]:
        filter = test_data_countries == i
        filter_data = test_crps[filter]
        if len(filter_data) > 0:
            item = round(np.array(filter_data).mean(), 2)
        else:
            item = 0
        result += '&{:.2f}'.format(item)
    print(result)
Ejemplo n.º 4
0
def main():
    start = datetime.now()

    # get the data
    train_data = helpers.load_data(numpy_path, 'train_set.npy')
    valid_data = helpers.load_data(numpy_path, 'valid_set.npy')
    test_data = helpers.load_data(numpy_path, 'test_set.npy')

    # filter the data
    test_data_labels = np.array([item[0] for item in test_data[:, 2]])
    test_data_countries = np.array([item[0] for item in test_data[:, 0]])
    test_data_month = test_data[:, 5]

    # convert the data
    train_dataset, train_shape = convert_dataset(train_data,
                                                 batchsize=batchsize,
                                                 shuffle=1000,
                                                 shape=True)
    valid_dataset = convert_dataset(valid_data, batchsize=1000, shuffle=100)
    test_dataset = convert_dataset(test_data, batchsize=1000)

    # build the model
    model = build_model(train_shape[1], train_shape[2])

    # Print Model
    # modelprovider.printModel(model, dir=os.path.join(
    #     logdir, expname), name=expname+".png")

    # compiling the model
    lossfn = loss.crps_cost_function
    opt = Adam(lr=learning_rate, amsgrad=True)
    model.compile(loss=lossfn, optimizer=opt)

    # checkdir path
    checkpoint_dir = os.path.join(logdir, expname, 'checkpoints/')

    # begin with training 10 times
    print('[INFO] Starting training')
    predictions = []
    for i in range(1, 11):
        print('Round number: ' + str(i))
        model = build_model(train_shape[1], train_shape[2])

        # compile new model with new inital weights
        model.compile(loss=lossfn, optimizer=opt)

        # checkpoint callbacks
        # all checkpoints
        cp_callback_versuch = tf.keras.callbacks.ModelCheckpoint(
            os.path.join(checkpoint_dir, 'round-' + str(i) + '/') +
            "checkpoint_{epoch}",
            monitor='val_loss',
            save_weights_only=True,
            mode='min',
            verbose=0)
        # best checkpoint
        cp_callback = tf.keras.callbacks.ModelCheckpoint(
            os.path.join(checkpoint_dir, 'round-' + str(i) + '/checkpoint'),
            monitor='val_loss',
            save_weights_only=True,
            mode='min',
            save_best_only=True,
            verbose=0)

        # train the model
        if train_model:
            model.fit(
                train_dataset,
                epochs=epochs,
                initial_epoch=initial_epochs,
                batch_size=batchsize,
                verbose=1,
                validation_data=valid_dataset,
                validation_batch_size=1000,
                callbacks=[cp_callback, cp_callback_versuch],
            )

        # load the best checkpoint of round i
        model.load_weights(
            os.path.join(checkpoint_dir,
                         'round-' + str(i) + '/checkpoint')).expect_partial()

        predictions.append(
            model.predict(test_dataset, batch_size=1000, verbose=0))

    # convert to numpy array
    predictions = np.array(predictions)
    # Make sure std is positive
    predictions[:, :, 1] = np.abs(predictions[:, :, 1])
    # calculate mean between the 10 results
    mean_predictions = np.mean(predictions, 0)
    # calculate the score for each record in test set
    test_crps = crps.norm_data(test_data_labels, mean_predictions)

    # print the results with filters
    helpers.printIntCountries(test_data_labels, test_data_countries,
                              mean_predictions)
    helpers.printHist(helpers.datasetPIT(mean_predictions, test_data_labels))

    np.save(os.path.join(logdir, expname, 'prediction'), predictions)
    print(datetime.now() - start)