Ejemplo n.º 1
0
def test_preprocess():
    x = np.array([np.random.rand(28, 28)])
    y = np.array([2])
    x_train_pp, y_train_pp, x_test_pp, y_test_pp = train.preprocess(x, y, x, y)
    assert y_train_pp.shape == (1, 10)
    assert y_test_pp.shape == (1, 10)
    assert x_train_pp.shape == (1, 28, 28, 1)
    assert x_test_pp.shape == (1, 28, 28, 1)
Ejemplo n.º 2
0
def run_test():
    BS = 128
    print('Loading and preprocessing test data...')
    mean, std = Learner.load_meanstd()
    
    imgs_test = load_test_data()
    imgs_test = preprocess(imgs_test)

    imgs_test = imgs_test.astype('float32')
    imgs_test -= mean
    imgs_test /= std

    print('Loading saved weights...')
    model = get_unet(Adam(0.001))
    print ('Loading weights from %s' % Learner.best_weight_path)
    model.load_weights(Learner.best_weight_path)
    
    print ('Augment')
    alen, dlen = len(transforms), len(imgs_test)
    test_x = np.ndarray((alen, dlen, 1, img_rows, img_cols), dtype=np.float32)
    for i in range(dlen):
        for j, transform in enumerate(transforms):
            test_x[j,i] = transform['do'](imgs_test[i].copy())
    #
    print('Predicting masks on test data...')
    outputs = []
    asis_res = model.predict(imgs_test, batch_size=BS, verbose=1)
    outputs.append(asis_res)
    for j, transform in enumerate(transforms):
        t_y = model.predict(test_x[j], batch_size=BS, verbose=1)
        outputs.append(t_y)
    #
    print('Analyzing')
    test_masks = np.ndarray((dlen, 1, img_rows, img_cols), dtype=np.float32)
    test_probs = np.ndarray((dlen, ), dtype=np.float32)
    for i in range(dlen):
        masks = np.ndarray((alen+1, 1, img_rows, img_cols), dtype=np.float32)
        probs = np.ndarray((alen+1, ), dtype=np.float32)
        for j, t_y in enumerate(outputs):
            mask, prob = t_y[0][i], t_y[1][i]
            if j:
                mask = transforms[j-1]['undo'](mask)
            masks[j] = mask
            probs[j] = prob
        #
        test_masks[i] = np.mean(masks, 0)
        test_probs[i] = np.mean(probs)
            
    print('Saving ')
    np.save(Learner.test_mask_res, test_masks)
    np.save(Learner.test_mask_exist_res, test_probs)
def generate_submission():
    # Load test images and preprocess for conv net.
    print('Loading and processing test images')
    imgs_test = DataManager.load_test_data()
    total = imgs_test.shape[0]
    imgs = np.ndarray(
        (total, 1, DataManager.IMG_TARGET_ROWS, DataManager.IMG_TARGET_ROWS),
        dtype=np.uint8)
    i = 0
    for img in imgs_test:
        imgs[i] = preprocess(img)
        i += 1

    print('Loading network')
    model = build_model()
    model.load_weights('./results/net.hdf5')

    print('Generating predictions')
    masks, has_masks = model.predict(imgs, verbose=1)

    ids = []
    rles = []
    for i in range(total):
        # Zero out masks when there is no-nerve pred.
        if has_masks[i, 0] < 0.5:
            masks[i, 0] *= 0.

        mask = post_process_mask(masks[i, 0])
        rle = run_length_enc(mask)
        rles.append(rle)
        ids.append(i + 1)

        if i % 100 == 0:
            print('{}/{}'.format(i, total))

    first_row = 'img,pixels'
    file_name = 'results/submission_{}.csv'.format(str(datetime.now()))

    with open(file_name, 'w+') as f:
        f.write(first_row + '\n')
        for i in range(total):
            s = str(ids[i]) + ',' + rles[i]
            f.write(s + '\n')
def run_test():
    BS = 256
    print('Loading and preprocessing test data...')
    mean, std = Learner.load_meanstd()

    imgs_test = load_test_data()
    #    imgs_test = imgs_test[:100]
    #    print ('test')
    imgs_test = preprocess(imgs_test)

    imgs_test = imgs_test.astype('float32')
    imgs_test -= mean
    imgs_test /= std

    print('Augment')
    alen, dlen = len(transforms), len(imgs_test)
    test_x = np.ndarray((alen, dlen, 1, img_rows, img_cols), dtype=np.float32)
    for i in xrange(dlen):
        for j, transform in enumerate(transforms):
            test_x[j, i] = transform['do'](imgs_test[i].copy())
    #
    kfold = 6
    kfold_masks, kfold_prob = [], []
    for _iter in xrange(kfold):
        print('Iter=%d, Loading saved weights...' % _iter)
        model = get_unet(Adam(0.001))
        filepath = Learner.best_weight_path + '_%d.fold' % _iter
        print('Loading weights from %s' % filepath)
        model.load_weights(filepath)
        #
        print('Predicting masks on test data...')
        outputs = []
        asis_res = model.predict(imgs_test, batch_size=BS, verbose=1)
        outputs.append(asis_res)
        for j, transform in enumerate(transforms):
            t_y = model.predict(test_x[j], batch_size=BS, verbose=1)
            outputs.append(t_y)
        #
        print('Analyzing')
        test_masks = np.ndarray((dlen, 1, img_rows, img_cols),
                                dtype=np.float32)
        test_probs = np.ndarray((dlen, ), dtype=np.float32)
        for i in xrange(dlen):
            masks = np.ndarray((alen + 1, 1, img_rows, img_cols),
                               dtype=np.float32)
            probs = np.ndarray((alen + 1, ), dtype=np.float32)
            for j, t_y in enumerate(outputs):
                mask, prob = t_y[0][i], t_y[1][i]
                if j:
                    mask = transforms[j - 1]['undo'](mask.copy())
                masks[j] = mask
                probs[j] = prob
            #
            test_masks[i] = np.mean(masks, 0)
            test_probs[i] = np.mean(probs)
        kfold_masks.append(test_masks)
        kfold_prob.append(test_probs)

    print 'Summing results of ensemble'
    #
    res_masks = np.ndarray((dlen, 1, img_rows, img_cols), dtype=np.float32)
    res_probs = np.ndarray((dlen, ), dtype=np.float32)
    for i in xrange(dlen):
        masks = np.ndarray((kfold, 1, img_rows, img_cols), dtype=np.float32)
        probs = np.ndarray((kfold, ), dtype=np.float32)
        for k in xrange(kfold):
            masks[k] = kfold_masks[k][i]
            probs[k] = kfold_prob[k][i]
        res_masks[i] = np.mean(masks, 0)
        res_probs[i] = np.mean(probs)

    print('Saving ')
    np.save(Learner.test_mask_res, res_masks)
    np.save(Learner.test_mask_exist_res, res_probs)
Ejemplo n.º 5
0
"""
print timeit.timeit(code, number=100000)
'''
#def training_testing(inputlines,type):
# type 1 is splitting the data into training and testing.
fopen = open("./train.csv","r")
datalines = fopen.readlines()
#linenumbers = range(1,len(datalines))  # skipping the first line
datalines = datalines[1:]
#print linenumbers
shuffle(datalines)
percentage = int(0.8*len(datalines))
training_lines = datalines[:percentage]
print "training lines: ",len(training_lines)

[worddict,gtruth,idf_dict ] = train.preprocess(training_lines)
train.multinomial_training(worddict,gtruth,idf_dict,training_lines,"test_model_param.p")


# need to parse the testing_lines to remove the ground truth and repack as list of string.
gold_data = [int(line[0]) for line in datalines[percentage+1:]]
testing_lines =[line[2:] for line in datalines[percentage+1:] ]
print "testing lines: ",len(testing_lines)

test.multinomial_testing("test_model_param.p",testing_lines,"test_results.txt")


lista = open("test_results.txt","r").readlines()
listb = [int(element.strip().split(",")[1]) for element in lista[1:]]
comparision  = map(operator.sub, gold_data, listb)
print "If score is 0 its perfect return else score is error value"
Ejemplo n.º 6
0
import numpy as np
from predict import load_test_data
from train import preprocess


path = '/Users/xuchenyang/Documents/third_exp/file/segnet-lr-3-32-100/'
predicted_masks = np.load( path + 'predict.npy')

imgs_test, imgs_test_mask = load_test_data()
#imgs_test_source = imgs_test.astype('float32')
imgs_test_gt = preprocess(imgs_test_mask)

predicted_masks_flat = predicted_masks.flatten()
test_gt_masks_flat = imgs_test_gt.flatten()

from sklearn import metrics
fpr, tpr, thresholds = metrics.roc_curve(test_gt_masks_flat, predicted_masks_flat, pos_label=255)

import matplotlib.pyplot as plt
#plt.plot(list(fpr),list(tpr))
plt.plot([0,1],[0,1],'k--')
line1, = plt.plot(fpr,tpr,'b',label="U-NET ROC (AUC = 0.86)")

plt.legend(handles=[line1],loc=4,prop={'size':12})
#plt.plot(list(fpr),list(tpr))
plt.xlim(0,1.0)
plt.ylim(0,1.0)
plt.xlabel("False Positive Rate (1-Specificity)")
plt.ylabel("True Positive Rate (Sensitivity)")
plt.grid()
plt.savefig(path+'roc')
Ejemplo n.º 7
0
import pandas as pd
from train import preprocess
from keras.models import load_model
from math import isnan

dataframe = pd.read_csv("data/test.csv")
p_ids = dataframe["PassengerId"]
data = preprocess(dataframe)
model = load_model('titanic.h5')
prediction = model.predict(data)

with open('result.csv', 'w') as file:
    file.write("PassengerId,Survived\n")
    for p_id, pred in zip(p_ids, prediction):
        rounded_pred = int(round(pred[0]))
        file.write(str(p_id) + "," + str(rounded_pred) + "\n")
Ejemplo n.º 8
0
                run_id = version.run_id
    model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_stage}")

    # Load data
    data_path = Path(__file__).parents[0].resolve() / "data"
    train_df = pd.read_csv(data_path / "train.csv")
    test_df = pd.read_csv(data_path / "test.csv")

    # preprocessing
    impute_strats = {
        "Age": round(train_df["Age"].mean()),
        "Embarked": train_df["Embarked"].mode()[0],
        "Cabin": "Unknown",
        "Fare": round(train_df["Fare"].mean()),
    }
    test_df = preprocess(test_df, feature_labels, impute_strats)
    test_df["Survived"] = model.predict(test_df[feature_labels])
    test_df[["PassengerId", "Survived"]].to_csv(data_path / "submission.csv",
                                                index=False)

    # submit to kaggle
    kaggle.api.competition_submit(
        file_name=str(data_path / "submission.csv"),
        message="Testing submission api",
        competition="titanic",
    )
    time.sleep(30)
    test_accuracy = kaggle.api.process_response(
        kaggle.api.competitions_submissions_list_with_http_info(
            "titanic"))[0]["publicScore"]
    mlflow_client.log_metric(run_id, "test_accuracy", float(test_accuracy))
Ejemplo n.º 9
0
 def test_preprocess(self):
     x_train, y_train, x_dev, y_dev = preprocess()
     print("x_train, y_train, x_dev, y_dev: {0},{1}".format(len(x_train), len(x_dev)))
def run_test():
    BS = 128
    print('Loading and preprocessing test data...')
    mean, std = Learner.load_meanstd()
    
    imgs_test, img_test_mask_gt = load_test_data()
    test_img_id = load_test_ids()

    imgs_test = preprocess(imgs_test)
    img_test_mask_gt = preprocess(img_test_mask_gt)

    imgs_test = imgs_test.astype('float32')
    imgs_test -= mean
    imgs_test /= std

    img_test_mask_gt = img_test_mask_gt.astype('float32')
    # mask_array = np.array(mask_array, dtype=np.float32)
    img_test_mask_gt /= 255.0

    print('Loading saved weights...')
    model = get_unet(Adam(0.001))
    print ('Loading weights from %s' % Learner.best_weight_path)
    model.load_weights(Learner.best_weight_path)
    
    print ('Augment')
    alen, dlen = len(transforms), len(imgs_test)
    test_x = np.ndarray((alen, dlen, 1, img_rows, img_cols), dtype=np.float32)
    for i in range(dlen):
        for j, transform in enumerate(transforms):
            test_x[j,i] = transform['do'](imgs_test[i].copy())
    #
    print('Predicting masks on test data...')
    outputs = []
    asis_res = model.predict(imgs_test, batch_size=BS, verbose=1)
    outputs.append(asis_res)
    for j, transform in enumerate(transforms):
        t_y = model.predict(test_x[j], batch_size=BS, verbose=1)
        outputs.append(t_y)
    #
    print('Analyzing')
    test_masks = np.ndarray((dlen, 1, img_rows, img_cols), dtype=np.float32)
    test_probs = np.ndarray((dlen, ), dtype=np.float32)
    for i in range(dlen):
        masks = np.ndarray((alen+1, 1, img_rows, img_cols), dtype=np.float32)
        probs = np.ndarray((alen+1, ), dtype=np.float32)
        for j, t_y in enumerate(outputs):
            mask, prob = t_y[0][i], t_y[1][i]
            if j:
                mask = transforms[j-1]['undo'](mask)
            masks[j] = mask
            probs[j] = prob
        #
        test_masks[i] = np.mean(masks, 0)
        test_probs[i] = np.mean(probs)

    # test_eval = model.evaluate(imgs_test, img_test_mask_gt, batch_size=BS)
    print(img_test_mask_gt.shape)
    print(test_masks.shape)
    # pred_dir = 'preds'
    # if not os.path.exists(pred_dir):
    #     os.mkdir(pred_dir)
    # for image, image_id in zip(test_masks, test_img_id):
    #     # image = (image[:, :, 0] * 255.).astype(np.uint8)
    #     image = (image[0, :, :] * 255.)
    #     print(image)
    #     print(image.shape)
    #     imsave(os.path.join(pred_dir, str(image_id) + '_pred.png'), image)
    print('Saving ')
    np.save(Learner.test_mask_res, test_masks)
    np.save(Learner.test_mask_exist_res, test_probs)
    np.save(Learner.test_mask_gt, img_test_mask_gt)
Ejemplo n.º 11
0
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 0
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],  # 8
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],  # 2
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],  # 4
    ]

    # test data
    test_digits_path = crop_image(
        os.path.join(os.path.curdir, 'captcha', 'test_set',
                     'captcha_test.jpg'), 'test')
    test_feature_set = []
    for digit_path in test_digits_path:
        test_feature_set.append(feature_extract(digit_path))
    test_label_set = [
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],  # 7
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],  # 3
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 0
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],  # 4
    ]

    # train & predict
    scaler = preprocess(train_feature_set)
    scaler.transform(train_feature_set)
    scaler.transform(test_feature_set)

    clf = train(train_feature_set, train_label_set)
    prediction = clf.predict(test_feature_set)

    print(prediction)
    print(np.argmax(prediction, axis=1))
Ejemplo n.º 12
0
                print("\nEvaluation:")
                dev_step(x_dev, y_dev, writer=dev_summary_writer)
                print("")

                path = saver.save(sess,
                                  checkpoint_prefix,
                                  global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--prune", dest="prune", action="store_true")
    parser.add_argument("--make_graph", dest="makegraph", action="store_true")
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = get_args()

    if args.prune:
        prune_filter_weight("./runs/1557076524/checkpoints/model-21300.meta",
                            './runs/1557076524/checkpoints/', 128)
        #filters = prune_filter_weight("./pruned_by_l2_norm/1557071581/checkpoints/model-2600.meta", './pruned_by_l2_norm/1557071581/checkpoints',90)

    elif args.makegraph:
        x_train, y_train, vocab_processor, x_dev, y_dev, x_test, y_test = train.preprocess(
        )
        make_graph(x_train, y_train, vocab_processor, x_dev, y_dev)
Ejemplo n.º 13
0
def predict():
    model = get_unet()
    # print (model.metrics_names)
    # imgs_train, imgs_mask_train = load_train_data()

    path_to_save_results = path + "UNET_PREDICTIONS/"

    # imgs_train = preprocess(imgs_train)
    # imgs_mask_train = preprocess(imgs_mask_train)
    #
    # # mean= np.mean(img)
    # # std = np.std(imgs_mask_train)

    imgs_test, imgs_test_mask = load_test_data()

    mean = np.mean(imgs_test)
    std = np.std(imgs_test)
    # print(std)

    imgs_test = preprocess(imgs_test)
    imgs_test_mask = preprocess(imgs_test_mask)

    imgs_test_source = imgs_test.astype('float32')
    imgs_test_source -= mean
    imgs_test_source /= std

    imgs_test_mask = imgs_test_mask.astype('float32')
    imgs_test_mask /= 255.  # scale masks to [0, 1]

    print('Loading saved weights...')
    print('-' * 30)
    model.load_weights(path + 'unet.hdf5')
    print('Predicting masks on test data...')
    print('-' * 30)
    imgs_mask_predict = model.predict(imgs_test_source, verbose=1)
    res = model.evaluate(imgs_test_source,
                         imgs_test_mask,
                         batch_size=32,
                         verbose=1)
    res_loss = np.array(res)
    np.save(path + 'predict.npy', imgs_mask_predict)
    np.savetxt(path + 'res_loss.txt', res_loss)
    predicted_masks = np.load(path + 'predict.npy')
    predicted_masks *= 255
    imgs_test, imgs_test_mask = load_test_data()

    for i in range(imgs_test.shape[0]):
        img = resize(imgs_test[i], (96, 96), preserve_range=True)
        img_mask = resize(imgs_test_mask[i], (96, 96), preserve_range=True)
        im_test_source = Image.fromarray(img.astype(np.uint8))
        im_test_masks = Image.fromarray((img_mask.squeeze()).astype(np.uint8))
        im_test_predict = Image.fromarray(
            (predicted_masks[i].squeeze()).astype(np.uint8))
        im_test_source_name = "Test_Image_" + str(i + 1) + ".png"
        im_test_predict_name = "Test_Image_" + str(i + 1) + "_Predict.png"
        im_test_gt_mask_name = "Test_Image_" + str(i + 1) + "_OriginalMask.png"
        im_test_source.save(
            os.path.join(path_to_save_results, im_test_source_name))
        im_test_predict.save(
            os.path.join(path_to_save_results, im_test_predict_name))
        im_test_masks.save(
            os.path.join(path_to_save_results, im_test_gt_mask_name))
    message = "Successfully Saved Results to " + path_to_save_results
    print message
Ejemplo n.º 14
0
import numpy as np
import pandas as pd
import pickle
from train import preprocess

outfile = 'model.sav'

# Parsing script arguments
parser = argparse.ArgumentParser(description='Process input')
parser.add_argument('tsv_path', type=str, help='tsv file path')
args = parser.parse_args()

# Reading input TSV
data = pd.read_csv(args.tsv_path, sep="\t")
ids = data['id'].copy()
X, y_true = preprocess(data, is_train=False)

#####
# TODO - your prediction code here
model = pickle.load(open(outfile, 'rb'))
log_pred = model.predict(X)
y_pred = np.exp(log_pred)

# Example:
prediction_df = pd.DataFrame(columns=['id', 'revenue'])
prediction_df['id'] = ids
prediction_df['revenue'] = y_pred
####

# TODO - How to export prediction results
prediction_df.to_csv("prediction.csv", index=False, header=False)
Ejemplo n.º 15
0
import numpy as np
from train import get_unet, preprocess

from data import load_test_data, load_train_data

imgs_train, imgs_mask_train = load_train_data()
imgs_train = preprocess(imgs_train)
imgs_train = imgs_train.astype('float32')
mean = np.mean(imgs_train)
std = np.std(imgs_train)

imgs_test, imgs_id_test = load_test_data()
imgs_test = preprocess(imgs_test)
imgs_test = imgs_test.astype('float32')
imgs_test -= mean
imgs_test /= std

model = get_unet()
model.load_weights("final.h5")
imgs_mask_test = model.predict(imgs_test, verbose=1)

np.save('imgs_mask_test_final.npy', imgs_mask_test)
Ejemplo n.º 16
0
"""
print timeit.timeit(code, number=100000)
'''
#def training_testing(inputlines,type):
# type 1 is splitting the data into training and testing.
fopen = open("./train.csv", "r")
datalines = fopen.readlines()
#linenumbers = range(1,len(datalines))  # skipping the first line
datalines = datalines[1:]
#print linenumbers
shuffle(datalines)
percentage = int(0.8 * len(datalines))
training_lines = datalines[:percentage]
print "training lines: ", len(training_lines)

[worddict, gtruth, idf_dict] = train.preprocess(training_lines)
train.multinomial_training(worddict, gtruth, idf_dict, training_lines,
                           "test_model_param.p")

# need to parse the testing_lines to remove the ground truth and repack as list of string.
gold_data = [int(line[0]) for line in datalines[percentage + 1:]]
testing_lines = [line[2:] for line in datalines[percentage + 1:]]
print "testing lines: ", len(testing_lines)

test.multinomial_testing("test_model_param.p", testing_lines,
                         "test_results.txt")

lista = open("test_results.txt", "r").readlines()
listb = [int(element.strip().split(",")[1]) for element in lista[1:]]
comparision = map(operator.sub, gold_data, listb)
print "If score is 0 its perfect return else score is error value"