Beispiel #1
0
#          snapshot_epoch=False, run_id='googlenet_cs249_aug')
#model.save('googlenet_aug')
model.load('googlenet_aug')
test_path = '/home/ubuntu/cs249_final_project/test'

import os
f = open('test.txt', 'w')
for filename in os.listdir(test_path):
    f.write(
        os.path.join(test_path, filename) + ' ' +
        os.path.splitext(filename)[0] + '\n')
f.close()

build_hdf5_image_dataset('test.txt',
                         image_shape=(300, 300),
                         mode='file',
                         categorical_labels=False,
                         output_path='testset.h5')
h5f = h5py.File('testset.h5', 'r')
X = h5f['X']
Y = h5f['Y']

predict = model.predict(X)

import csv
with open('submit.csv', 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow(['image_name', 'Type_1', 'Type_2', 'Type_3'])
    for i in range(len(predict)):
        filename = str(int(Y[i])) + '.jpg'
        writer.writerow([filename] + predict[i])
Beispiel #2
0
from tflearn.data_utils import build_hdf5_image_dataset
import h5py

path = '/home/suger/workspace/pig-face-recognition/raw_data/txt/'
# filenum = 1
# filename = 'train_data'
# files = []
# result = []
# for i in range(0, filenum):
#     files.append(path + filename + str(i) + '.txt')
#     result.append(filename + str(i) + '.h5')
#     build_hdf5_image_dataset(files[i], image_shape=(448, 448), mode='file', output_path=result[i], categorical_labels=True, normalize=False)
#     print('Finish dataset ' + result[i])

filenum = 1
filename = 'validation_data'
files = []
result = []
for i in range(0, filenum):
    files.append(path + filename + str(i) + '.txt')
    result.append(filename + str(i) + '.h5')
    build_hdf5_image_dataset(files[i],
                             image_shape=(448, 448),
                             mode='file',
                             output_path=result[i],
                             categorical_labels=True,
                             normalize=False)
    print('Finish dataset ' + result[i])
Beispiel #3
0
@author: sanat
"""
from __future__ import division, print_function, absolute_import

import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression
from tflearn.data_utils import build_hdf5_image_dataset
import tflearn.datasets.oxflower17 as oxflower17
import h5py
import math
X, Y = oxflower17.load_data(one_hot=True, resize_pics=(227, 227))
"""train_dataset_file = '/home/sanat/Desktop/CDSAML/imagepaths(train).txt' 
val_dataset_file = '/home/sanat/Desktop/CDSAML/imagepaths(val).txt'
build_hdf5_image_dataset(train_dataset_file, image_shape=(128, 128), mode='file', output_path='dataset.h5', categorical_labels=True, normalize=True)

h5f = h5py.File('dataset.h5', 'r')
X = h5f['X']
Y = h5f['Y'] 

X_val, Y_val = image_preloader(val_dataset_file, image_shape=(227, 227),
                       mode='file', categorical_labels=True,
                       normalize=True)"""

# Building 'Alexnet 2.0'

network = input_data(shape=[None, 227, 227, 3])
"""layer 1:conv+max+norm"""
Beispiel #4
0
"""

from __future__ import division, print_function, absolute_import

import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression

dataset_file = r'/home/ubuntu/cs249_final_project/image_files/train'

from tflearn.data_utils import build_hdf5_image_dataset
build_hdf5_image_dataset(dataset_file,
                         image_shape=(300, 300),
                         mode='folder',
                         output_path='dataset.h5',
                         categorical_labels=True,
                         normalize=True)

import h5py
h5f = h5py.File('dataset.h5', 'r')
X = h5f['X']
Y = h5f['Y']

net = tflearn.input_data(shape=[None, 300, 300, 3])
net = tflearn.conv_2d(net, 64, 3, activation='relu', bias=False)
# Residual blocks
net = tflearn.residual_bottleneck(net, 3, 16, 64)
net = tflearn.residual_bottleneck(net, 1, 32, 128, downsample=True)
net = tflearn.residual_bottleneck(net, 2, 32, 128)
net = tflearn.residual_bottleneck(net, 1, 64, 256, downsample=True)
from tqdm import tqdm


def get_label(windows_path):
    fileName = (str(windows_path)).split('\\')[-1]
    label = int(fileName[0:fileName.find('.')].split('_')[1]) - 1
    return label


if __name__ == '__main__':
    for name in ['train', 'val']:
        print('{0} h5 file start to create'.format(name))
        directory = pathlib.Path('data/' + name)
        h5plan_name = pathlib.Path('h5plan.txt')
        output_filename = pathlib.Path(name + '_128.h5')
        with open(h5plan_name, "w") as text_file:
            for windows_path in tqdm(list(directory.glob('*.jpg'))):
                text_file.write('{0} {1}\n'.format(str(windows_path),
                                                   get_label(windows_path)))

        build_hdf5_image_dataset(h5plan_name,
                                 image_shape=(128, 128),
                                 mode='file',
                                 output_path=output_filename,
                                 categorical_labels=True,
                                 normalize=True)

        print('{0} h5 file created'.format(name))
        train_data = h5py.File(output_filename, 'r')
        X_train, Y_train = train_data['X'], train_data['Y']
        print(Y_train[:])
Beispiel #6
0
##import tflearn.datasets.oxflower17 as oxflower17
##X, Y = oxflower17.load_data(one_hot=True, resize_pics=(227, 227))

import tensorflow as tf
import tflearn
import h5py
from tflearn.data_utils import build_hdf5_image_dataset

##Building Dataset
root_folder = './cacoons/jpg/'
print('-----------Start making dataset!----------')
if not os.path.exists('cacoon.h5'):
    build_hdf5_image_dataset(root_folder,
                             image_shape=(227, 227),
                             mode='folder',
                             output_path='cacoon.h5',
                             categorical_labels=True,
                             normalize=False)
print('------------Dataset is prepared!-------------- ')
h5f = h5py.File('cacoon.h5', 'r')
X = h5f['X']
Y = h5f['Y']

# Building 'AlexNet'
network = input_data(shape=[None, 227, 227, 3])
network = conv_2d(network, 48, 11, strides=4, activation='relu')
network = max_pool_2d(network, 3, strides=2)
network = local_response_normalization(network)
network = conv_2d(network, 128, 5, activation='relu')
network = max_pool_2d(network, 3, strides=2)
network = local_response_normalization(network)
Beispiel #7
0
def data_prep0(conf, method='random', training_size=0.7):
    '''
    create hdf5 files based on train and test selecting strategy
    and return results as train and test sets
    it augments train data
    :param conf: parameters from congigure file
    :param method: random / sequence; at sequence get first training_size of images as training
    :param training_size: portion of images that will be used in trainig
    :return: X_train, Y_train, X_test, Y_test
    '''

    data_folder = conf['data_folder']

    test_counter = 0
    counter = 0
    train_file = data_folder + 'train.txt'
    test_file = data_folder + 'test.txt'
    with open(data_folder + 'labeling.csv') as inputFile:
        df = pd.read_csv(data_folder + 'labeling.csv')
        with open(train_file, 'w+') as trainFile:
            with open(test_file, 'w+') as testFile:
                for row in df.iterrows():
                    row = row[1]
                    counter += 1
                    test_counter += 1
                    line = data_folder + 'ercis/' + str(int(row['index'] - 1)).zfill(5) + '.tif' \
                           + ' ' + str(int(row['manual_label'])) + '\n'
                    if method == 'random':
                        if random.random() < training_size:
                            trainFile.write(line)
                        else:
                            testFile.write(line)
                            trainFile.write(line)
                            test_counter += 1
                    else:
                        if counter < 37204 * training_size:  # random.random() < train_size:
                            trainFile.write(line)
                        else:
                            testFile.write(line)
                            trainFile.write(line)
                            test_counter += 1

    # Build a HDF5 dataset (only required once)
    build_hdf5_image_dataset(train_file,
                             image_shape=(40, 40),
                             mode='file',
                             output_path=data_folder + 'train.h5',
                             categorical_labels=True,
                             normalize=True)
    build_hdf5_image_dataset(test_file,
                             image_shape=(40, 40),
                             mode='file',
                             output_path=data_folder + 'test.h5',
                             categorical_labels=True,
                             normalize=True)
    # Load HDF5 dataset
    h5f = h5py.File('train.h5', 'w')
    X_train = h5f['X']
    Y_train = h5f['Y']

    h5f = h5py.File('test.h5', 'w')
    X_test = h5f['X']
    Y_test = h5f['Y']

    return X_train, Y_train, X_test, Y_test
from tflearn.data_utils import build_hdf5_image_dataset
import h5py

new_train = "/home/adoke/tf_tutorial/aircrafts_new/new_train_val/new_train.txt"
new_val = "/home/adoke/tf_tutorial/aircrafts_new/new_train_val/new_val.txt"
new_test = "/home/adoke/tf_tutorial/aircrafts_new/from_start/a3_variants_test.txt"

# image_shape option can be set to different values to create images of different sizes
build_hdf5_image_dataset(new_val,
                         image_shape=(224, 224),
                         mode='file',
                         output_path='new_val_224.h5',
                         categorical_labels=True,
                         normalize=False)
print 'Done creating new_val.h5'
build_hdf5_image_dataset(new_test,
                         image_shape=(224, 224),
                         mode='file',
                         output_path='new_test_224.h5',
                         categorical_labels=True,
                         normalize=False)
print 'Done creating new_test.h5'
build_hdf5_image_dataset(new_train,
                         image_shape=(488, 488),
                         mode='file',
                         output_path='new_train_488.h5',
                         categorical_labels=True,
                         normalize=False)
print 'Done creating new_train_488.h5'
import sys
import csv
import numpy as np
import matplotlib.pyplot as plt
import tflearn
import tensorflow as tf
tf.reset_default_graph()
from tflearn.data_utils import load_csv

csv.field_size_limit(sys.maxsize)

# Build a HDF5 dataset (only required once)
from tflearn.data_utils import build_hdf5_image_dataset
build_hdf5_image_dataset(dataset_file,
                         image_shape=(250, 250),
                         mode='file',
                         output_path='your_data_path.h5',
                         categorical_labels=False,
                         normalize=True)

# Load HDF5 dataset
import h5py
h5f = h5py.File('your_data_path.h5', 'r')
X = h5f['X']
Y = h5f['Y']

network = tflearn.input_data(shape=[None, 250, 250, 3])
network = tflearn.conv_2d(network,
                          64,
                          10,
                          4,
                          activation='relu',
Beispiel #10
0
import os
from PIL import Image
from tflearn.data_utils import build_hdf5_image_dataset

image_shape = (32, 32)
dataset_file = "classes.txt"
output_path = "dataset.h5"

for file_name in os.listdir("."):
    if os.path.isdir(file_name):
        for image_name in os.listdir(file_name):
            image_path = "{}/{}".format(file_name, image_name)
            image = Image.open(image_path)
            image = image.resize(image_shape)
            image.save(image_path)

build_hdf5_image_dataset(dataset_file,
                         image_shape=image_shape,
                         output_path=output_path,
                         mode="file",
                         categorical_labels=True,
                         normalize=True,
                         grayscale=False)
Beispiel #11
0
import tensorflow as tf
import tflearn
import os
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_utils import build_hdf5_image_dataset
import h5py
data_dir = 'TU-Berlin'

build_hdf5_image_dataset(data_dir,
                         image_shape=(224, 224),
                         mode='folder',
                         output_path='dataset.h5',
                         categorical_labels=True,
                         normalize=True)

h5f = h5py.File('dataset.h5', 'r')
X = h5f['X']
Y = h5f['Y']

print(X.shape)
Beispiel #12
0
# new_train = "/home/adoke/tf_tutorial/aircrafts_new/new_train_val/new_train.txt"
# new_val = "/home/adoke/tf_tutorial/aircrafts_new/new_train_val/new_val.txt"
# new_test = "/home/adoke/tf_tutorial/aircrafts_new/from_start/a3_variants_test.txt"
#
# # image_shape option can be set to different values to create images of different sizes
# build_hdf5_image_dataset(new_val, image_shape=(224, 224), mode='file', output_path='new_val_224.h5', categorical_labels=True, normalize=False)
# print ('Done creating new_val.h5')
# build_hdf5_image_dataset(new_test, image_shape=(224, 224), mode='file', output_path='new_test_224.h5', categorical_labels=True, normalize=False)
# print ('Done creating new_test.h5')
# build_hdf5_image_dataset(new_train, image_shape=(488, 488), mode='file', output_path='new_train_488.h5', categorical_labels=True, normalize=False)
# print ('Done creating new_train_488.h5')


from tflearn.data_utils import build_hdf5_image_dataset

trainfile = "../../data/train_data/"
validfile = "../../data/valid_data/"
totaltrain = "../../data/train/"

build_hdf5_image_dataset(trainfile, image_shape=(448, 448),
                         mode='folder', output_path='../data/train.h5',
                         categorical_labels=True, normalize=False, files_extension=['.JPEG'])
print('Done creating train.h5')
build_hdf5_image_dataset(validfile, image_shape=(224, 224),
                         mode='folder', output_path='../data/valid.h5',
                         categorical_labels=True, normalize=False, files_extension=['.jpg'])
print('Done creating valid.h5')
# build_hdf5_image_dataset(trainfile, image_shape=(448, 448),
#                          mode='folder', output_path='../data/train.h5',
#                          categorical_labels=True, normalize=False, files_extension=['.JPEG'])
from tflearn.data_utils import build_hdf5_image_dataset
import h5py

#new_train = "/home/adoke/tf_tutorial/aircrafts_new/new_train_val/new_train.txt"
#new_val = "/home/adoke/tf_tutorial/aircrafts_new/new_train_val/new_val.txt"
#new_test = "/home/adoke/tf_tutorial/aircrafts_new/from_start/a3_variants_test.txt"
new_train = "/media/goerlab/My Passport/Welder_detection/dataset/20180209/h5_middle_crop/train2.txt"
new_val = "/media/goerlab/My Passport/Welder_detection/dataset/20180209/h5_middle_crop/val.txt"
#new_test="/home/goerlab/Bilinear-CNN-TensorFlow/train_test_small/images_test.txt"

dir = "/media/goerlab/My Passport/Welder_detection/dataset/20180209/h5_middle_crop/"

#image_shape option can be set to different values to create images of different sizes
# build_hdf5_image_dataset(new_val, image_shape=(448, 448), mode='file', output_path=dir+'new_val_448.h5', categorical_labels=True, normalize=False)
#
# #
# print 'Done creating new_val.h5'
#build_hdf5_image_dataset(new_test, image_shape=(448, 448), mode='file', output_path=dir+'new_test_224.h5', categorical_labels=True, normalize=False)
#print 'Done creating new_test.h5'
build_hdf5_image_dataset(new_train,
                         image_shape=(448, 448),
                         mode='file',
                         output_path=dir + 'new_train_448.h5',
                         categorical_labels=True,
                         normalize=False)
print 'Done creating new_train_488.h5'
DATA_DIR = os.environ['HOME']

train = pd.read_csv(TRAIN_FILE)
# split into training and test

f = open(DATASET_FILE, 'w')
for idx, row in training.iterrows():
    file_path = os.environ['HOME'] + '/' + row['file_path'].split('/')[1]
    folder = os.environ['HOME'] + '/' + row['file_path'].split('/')[0]
    if not os.path.isfile(file_path): continue
    # create folder if needed
    # crop image
    # write logic to toss out poorly formatted images
    line_string = '{} {}\\n'.format(file_path, row['expression'])
    f.write(line_string)
f.close()

dataset_file = 'train_dataset_trim.txt'
build_hdf5_image_dataset(dataset_file,
                         image_shape=(IMG_SIZE, IMG_SIZE),
                         mode='file',
                         output_path=TRAIN_H5,
                         normalize=True,
                         categorical_labels=True)

build_hdf5_image_dataset(dataset_file,
                         image_shape=(IMG_SIZE, IMG_SIZE),
                         mode='file',
                         output_path=TRAIN_H5,
                         normalize=True,
                         categorical_labels=True)
Beispiel #15
0
def data_prep(conf, method='random', training_size=0.7, clean_start=False):
    '''
    create hdf5 files based on train and test selecting strategy
    and return results as train and test sets
    :param conf: parameters from congigure file
    :param method: random / sequence; at sequence get first training_size of images as training
    :param training_size: portion of images that will be used in trainig
    :param start_clean: start from zero or work based on previous work
    :return: X_train, Y_train, X_test, Y_test
    '''

    data_folder = conf['data_folder']
    if clean_start:
        os.remove(data_folder + 'train.h5')
        os.remove(data_folder + 'test.h5')
    #in the case files is available just read it and return
    if os.path.isfile(data_folder +
                      'train.h5') and os.path.isfile(data_folder +
                                                     'validation.h5'):
        _h5f = h5py.File(data_folder + 'train.h5', 'r')
        X_train = _h5f['X']
        Y_train = _h5f['Y']
        h5f_ = h5py.File(data_folder + 'validation.h5', 'r')
        X_test = h5f_['X']
        Y_test = h5f_['Y']

        return X_train, Y_train, X_test, Y_test

    # if the file is not available, continue creating it
    test_counter = 0
    counter = 0
    train_file = data_folder + 'train.txt'
    test_file = data_folder + 'test.txt'
    with open(data_folder + 'labeling.csv') as inputFile:
        df = pd.read_csv(data_folder + 'labeling.csv')
        with open(train_file, 'w+') as trainFile:
            with open(test_file, 'w+') as testFile:
                for row in df.iterrows():
                    row = row[1]
                    counter += 1
                    test_counter += 1
                    line = data_folder + 'ercis/' + str(int(row['index'] - 1)).zfill(5) + '.tif' \
                        + ' ' + str(int(row['manual_label'])) + '\n'
                    if method == 'random':
                        if random.random() < training_size:
                            trainFile.write(line)
                        else:
                            testFile.write(line)
                            test_counter += 1
                    else:
                        if counter < 37204 * training_size:  # random.random() < train_size:
                            trainFile.write(line)
                        else:
                            testFile.write(line)
                            test_counter += 1

    # Build a HDF5 dataset (only required once)
    build_hdf5_image_dataset(train_file,
                             image_shape=(40, 40),
                             mode='file',
                             output_path=data_folder + 'train.h5',
                             categorical_labels=True,
                             normalize=True)
    build_hdf5_image_dataset(test_file,
                             image_shape=(40, 40),
                             mode='file',
                             output_path=data_folder + 'test.h5',
                             categorical_labels=True,
                             normalize=True)
    # Load HDF5 dataset
    _h5f = h5py.File(data_folder + 'train.h5', 'r')
    X_train = _h5f['X']
    Y_train = _h5f['Y']

    h5f_ = h5py.File(data_folder + 'test.h5', 'r')
    X_test = h5f_['X']
    Y_test = h5f_['Y']

    return X_train, Y_train, X_test, Y_test
Beispiel #16
0
dataset_file = 'my_dataset.txt'

# Build a HDF5 dataset (only required once)
from tflearn.data_utils import build_hdf5_image_dataset

TRAIN_DATA = '../train_data'
VAL_DATA = '../val_data'

build_hdf5_image_dataset(TRAIN_DATA,
                         image_shape=(128, 128),
                         mode='file',
                         output_path='dataset_train.h5',
                         categorical_labels=True,
                         normalize=True)
from tflearn.data_utils import build_hdf5_image_dataset
import h5py

new_train = "train.txt"
new_val = "val.txt"
new_test = "test.txt"

# image_shape option can be set to different values to create images of different sizes
build_hdf5_image_dataset(new_val, image_shape=(50, 50), mode='file', output_path='new_val.h5', categorical_labels=True, normalize=False)
print 'Done creating new_val.h5'
build_hdf5_image_dataset(new_test, image_shape=(50, 50), mode='file', output_path='new_test.h5', categorical_labels=True, normalize=False)
print 'Done creating new_test.h5'
build_hdf5_image_dataset(new_train, image_shape=(50, 50), mode='file', output_path='new_train.h5', categorical_labels=True, normalize=False)
print 'Done creating new_train_488.h5'
#print(len(train))

print(tile_df[['cell_type_idx',
               'cell_type']].sort_values('cell_type_idx').drop_duplicates())

sys.exit()
with open('path_train.data', 'w') as pf:
    pf.writelines(train)
with open('path_val.data', 'w') as pf:
    pf.writelines(val)

build_hdf5_image_dataset(
    'path_train.data',
    image_shape=(224, 224),
    mode='file',
    output_path='train.h5',
    categorical_labels=True,
    normalize=True,
    grayscale=False,
)

build_hdf5_image_dataset(
    'path_val.data',
    image_shape=(224, 224),
    mode='file',
    output_path='val.h5',
    categorical_labels=True,
    normalize=True,
    grayscale=False,
)
# Load path/class_id image file:
# dataset_origin_train = './data/original_jpg_structured_train-validation-only/'
# dataset_origin_test = './data/original_jpg_structured_test-only/'

dataset_train_hdf5 = 'coal70_train_dataset.h5'
dataset_test_hdf5 = 'coal70_test_dataset.h5'

# Build a HDF5 dataset (only required once)
from tflearn.data_utils import build_hdf5_image_dataset

dataset_hdf5_file = Path(dataset_train_hdf5)
if not dataset_hdf5_file.exists():
    print("Creating", dataset_train_hdf5)
    build_hdf5_image_dataset(args['dataset_origin_train'],
                             image_shape=(256, 256),
                             mode='folder',
                             output_path=dataset_train_hdf5,
                             categorical_labels=True,
                             normalize=True)

dataset_hdf5_file = Path(dataset_test_hdf5)
if not dataset_hdf5_file.exists():
    print("Creating", dataset_test_hdf5)
    build_hdf5_image_dataset(args['dataset_origin_test'],
                             image_shape=(256, 256),
                             mode='folder',
                             output_path=dataset_test_hdf5,
                             categorical_labels=True,
                             normalize=True)

# Load HDF5 dataset
h5f = h5py.File(dataset_train_hdf5, 'r')
from tflearn.data_utils import build_hdf5_image_dataset
import h5py

new_train = "/home/meteo/zihao.chen/fine-grained-classifi/train_test/train_data"
new_val = "/home/meteo/zihao.chen/fine-grained-classifi/train_test/validation_data.txt"
new_test = "/home/meteo/zihao.chen/fine-grained-classifi/train_test/test_data.txt"

upload_test = '/home/meteo/zihao.chen/fine-grained-classifi/TEST_FIlE'
# image_shape option can be set to different values to create images of different sizes
# build_hdf5_image_dataset(new_val, image_shape=(448, 448), mode='file', output_path='new_val_448.h5', categorical_labels=True, normalize=False)
# print 'Done creating new_val_448.h5'
# build_hdf5_image_dataset(new_test, image_shape=(448, 448), mode='file', output_path='new_test_448.h5', categorical_labels=True, normalize=False)
# print 'Done creating new_test_448bc .h5'
# build_hdf5_image_dataset(new_train+'_%d.txt'%(0), image_shape=(224, 224), mode='file', output_path='new_train_224_%d.h5'% (0), categorical_labels=True, normalize=False)

for index in range(11):
    build_hdf5_image_dataset(upload_test + '/Test_%d.txt' % index,
                             image_shape=(448, 448),
                             mode='file',
                             output_path='TEST_FIlE/upload_test_448_%d.h5' %
                             index,
                             categorical_labels=True,
                             normalize=False)
    print 'Done creating new_train_448.h5'
Beispiel #21
0
    lambda x: mode + '/image_' + str(x) + '.jpg')

filenames = filenames.values.astype(str)
labels = y.values.astype(int)
data = np.zeros(filenames.size, dtype=[('var1', 'S36'), ('var2', int)])
data['var1'] = filenames
data['var2'] = labels

np.savetxt(dataset_file, data, fmt="%10s %d")

output = mode + 'dataset.h5'

build_hdf5_image_dataset(dataset_file,
                         image_shape=(50, 50, 1),
                         mode='file',
                         output_path=output,
                         categorical_labels=True,
                         normalize=True,
                         grayscale=True)

# Load HDF5 dataset
h5f = h5py.File('../data/' + mode + 'dataset.h5', 'r')
X_images = h5f['X']
Y_labels = h5f['Y'][:]

print(X_images.shape)
X_images = X_images[:, :, :].reshape([-1, 50, 50, 1])
print(X_images.shape)
h5f.close()

h5f = h5py.File('../data/' + mode + '.h5', 'w')
Beispiel #22
0
def data_prep_(conf, method = 'seq', training_size = 0.5, clean_start = False, hdfs=True):
    '''
    create hdf5 files based on train and test selecting strategy
    and return results as train and test sets
    :param conf: parameters from congigure file
    :param method: random / sequence; at sequence get first training_size of images as training
    :param training_size: portion of images that will be used in trainig
    :param start_clean: start from zero or work based on previous work
    :return: X_train, Y_train, X_test, Y_test
    '''

    data_folder = conf['data_folder']
    if clean_start:
        os.remove(data_folder + 'train.h5')
        os.remove(data_folder + 'test.h5')
    #in the case files is available just read it and return
    if os.path.isfile(data_folder + 'train.h5')  and os.path.isfile(data_folder + 'validation.h5'):
        _h5f = h5py.File(data_folder + 'train.h5', 'r')
        X_train = _h5f['X']
        Y_train = _h5f['Y']
        h5f_ = h5py.File(data_folder + 'validation.h5', 'r')
        X_test = h5f_['X']
        Y_test = h5f_['Y']

        return X_train, Y_train, X_test, Y_test

    # if the file is not available, continue creating it
    test_counter = 0
    counter = 0
    train_file = data_folder + 'train.txt'
    test_file = data_folder + 'test.txt'
    with open(data_folder + 'labeling.csv') as inputFile:
        df = pd.read_csv(data_folder + conf['data_frame'])
        with open(train_file, 'w+') as trainFile:
            with open(test_file, 'w+') as testFile:
                for f in os.listdir(data_folder + conf['cropped_folder']):
                    is_train = False
                    is_test = False
                    if method == 'random':
                        if random.random() < training_size:
                            is_train = True
                        else:
                            is_test = True
                    else:
                        if counter < len(df) * training_size:
                            is_train = True
                        else:
                            if random.random() < 0.25:
                                is_train = True
                            is_test = True

                    label = int(df.loc[int(f.split('.')[0].split('_')[0]) - 1, 'manual_label'])
                    line = data_folder + conf['cropped_folder'] + f  + ' ' + str(label) + '\n'
                    if is_train:
                        trainFile.write(line)
                    if is_test and not '_' in f:
                        testFile.write(line)

    if hdfs:
        # Build a HDF5 dataset (only required once)
        build_hdf5_image_dataset(train_file, image_shape=(40, 40), mode='file', output_path=data_folder + 'train.h5',
                                 categorical_labels=True, normalize=True)
        build_hdf5_image_dataset(test_file, image_shape=(40, 40), mode='file', output_path=data_folder + 'test.h5',
                                 categorical_labels=True, normalize=True)
        # Load HDF5 dataset
        _h5f = h5py.File(data_folder + 'train.h5', 'r')
        X_train = _h5f['X']
        Y_train = _h5f['Y']

        h5f_ = h5py.File(data_folder + 'test.h5', 'r')
        X_test = h5f_['X']
        Y_test = h5f_['Y']
    else:
        X_train, Y_train = image_preloader(data_folder +'train.txt', image_shape=(40, 40), mode='file',
                                           categorical_labels=True, normalize=True)
        X_test, Y_test = image_preloader(data_folder +'test.txt', image_shape=(40, 40), mode='file',
                                         categorical_labels=True,   normalize=True)


    return X_train, Y_train, X_test, Y_test
from tflearn.data_utils import shuffle
from tflearn.layers.core import input_data, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
from tflearn.layers.normalization import local_response_normalization
import h5py
from tflearn.data_utils import build_hdf5_image_dataset

dataset_file = 'train_data.txt'
validation_file = 'cval_data.txt'

build_hdf5_image_dataset(dataset_file,
                         image_shape=[144, 144],
                         mode='file',
                         output_path='dataset.h5',
                         categorical_labels=True)
h5f = h5py.File('dataset.h5', 'r')
X = h5f['X']
Y = h5f['Y']

build_hdf5_image_dataset(validation_file,
                         image_shape=[144, 144],
                         mode='file',
                         output_path='validation.h5',
                         categorical_labels=True)

h5f = h5py.File('validation.h5', 'r')
X_val = h5f['X']
Y_val = h5f['Y']
Beispiel #24
0
def get_data(data_dir, hdf5):
    """This function loads in the data, either by loading images on the fly or by creating and
    loading from a hdf5 database.

    Args:
        data_dir: Root directory of the dataset.
        hdf5: Boolean. If true, (create and) load data from a hdf5 database.

    Returns:
        X: training images.
        Y: training labels.
        X_test: validation images.
        Y_test: validation labels."""

    # Get the filenames of the lists containing image paths and labels.
    train_file, val_file = build_dataset_index(data_dir)

    # Check if (creating and) loading from hdf5 database is desired.
    if hdf5:
        # Create folder to store dataset.
        if not os.path.exists('hdf5'):
            os.makedirs('hdf5')
        # Check if hdf5 databases already exist and create them if not.
        if not os.path.exists('hdf5/tiny-imagenet_train.h5'):
            from tflearn.data_utils import build_hdf5_image_dataset
            print ' Creating hdf5 train dataset.'
            build_hdf5_image_dataset(train_file,
                                     image_shape=(64, 64),
                                     mode='file',
                                     output_path='hdf5/tiny-imagenet_train.h5',
                                     categorical_labels=True,
                                     normalize=True)

        if not os.path.exists('hdf5/tiny-imagenet_val.h5'):
            from tflearn.data_utils import build_hdf5_image_dataset
            print ' Creating hdf5 val dataset.'
            build_hdf5_image_dataset(val_file,
                                     image_shape=(64, 64),
                                     mode='file',
                                     output_path='hdf5/tiny-imagenet_val.h5',
                                     categorical_labels=True,
                                     normalize=True)

        # Load training data from hdf5 dataset.
        h5f = h5py.File('hdf5/tiny-imagenet_train.h5', 'r')
        X = h5f['X']
        Y = h5f['Y']

        # Load validation data.
        h5f = h5py.File('hdf5/tiny-imagenet_val.h5', 'r')
        X_test = h5f['X']
        Y_test = h5f['Y']

    # Load images directly from disk when they are required.
    else:
        from tflearn.data_utils import image_preloader
        X, Y = image_preloader(train_file,
                               image_shape=(64, 64),
                               mode='file',
                               categorical_labels=True,
                               normalize=True,
                               filter_channel=True)
        X_test, Y_test = image_preloader(val_file,
                                         image_shape=(64, 64),
                                         mode='file',
                                         categorical_labels=True,
                                         normalize=True,
                                         filter_channel=True)

    # Randomly shuffle the dataset.
    X, Y = shuffle(X, Y)

    return X, Y, X_test, Y_test
from tflearn.data_utils import build_hdf5_image_dataset
import h5py


path = '/home/smie/zhengjx/Res_Bilinear_cnns/raw_data/txt/' 
filenum = 50;
filename = 'train_data'
files = [];
result = [];
for i in range(0, filenum):
    files.append(path + filename + str(i) + '.txt');
    result.append(filename + str(i) + '.h5')
    build_hdf5_image_dataset(files[i], image_shape=(488, 488), mode='file', output_path=result[i], categorical_labels=True, normalize=False)
    print('Finish dataset ' + result[i]);
import tflearn
import tensorflow as tf
tf.reset_default_graph()
from tflearn.data_utils import load_csv

csv.field_size_limit(sys.maxsize)

# Load path/class_id image file:
#dataset_file ="your file path"

# Build a HDF5 dataset (only required once)
from tflearn.data_utils import build_hdf5_image_dataset
build_hdf5_image_dataset(
    dataset_file,
    image_shape=(250, 250),
    mode='file',
    output_path='/Users/coco/Desktop/CG_data/dataset_test.h5',
    categorical_labels=False,
    normalize=True)

# Load HDF5 dataset
import h5py
h5f = h5py.File('your file path.h5', 'r')
X = h5f['X']
Y = h5f['Y']

network = tflearn.input_data(shape=[None, 250, 250, 3])
network = tflearn.conv_2d(network,
                          64,
                          10,
                          4,
import tflearn
from tflearn.data_utils import shuffle
from tflearn.layers.core import input_data, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
from tflearn.layers.normalization import local_response_normalization
import h5py
from tflearn.data_utils import build_hdf5_image_dataset

dataset_file = 'train_data.txt'
validation_file = 'cval_data.txt'


build_hdf5_image_dataset(dataset_file, image_shape=[144, 144], mode='file', output_path='dataset.h5', categorical_labels=True)
h5f = h5py.File('dataset.h5', 'r')
X = h5f['X']
Y = h5f['Y']

build_hdf5_image_dataset(validation_file, image_shape=[144, 144], mode='file', output_path='validation.h5', categorical_labels=True)

h5f = h5py.File('validation.h5', 'r')
X_val = h5f['X']
Y_val = h5f['Y']

X, Y = shuffle(X, Y)

# Make sure the data is normalized
img_prep = ImagePreprocessing()
img_prep.add_featurewise_zero_center()
# # image_shape option can be set to different values to create images of different sizes
# build_hdf5_image_dataset(new_val, image_shape=(224, 224), mode='file', output_path='new_val_224.h5', categorical_labels=True, normalize=False)
# print ('Done creating new_val.h5')
# build_hdf5_image_dataset(new_test, image_shape=(224, 224), mode='file', output_path='new_test_224.h5', categorical_labels=True, normalize=False)
# print ('Done creating new_test.h5')
# build_hdf5_image_dataset(new_train, image_shape=(488, 488), mode='file', output_path='new_train_488.h5', categorical_labels=True, normalize=False)
# print ('Done creating new_train_488.h5')

from tflearn.data_utils import build_hdf5_image_dataset

trainfile = "../data/train_data_crop/"
validfile = "../data/valid_data_crop/"
totaltrain = "../data/total_train_crop/"

# build_hdf5_image_dataset(trainfile, image_shape=(224, 224),
#                          mode='folder', output_path='../data/train.h5',
#                          categorical_labels=True, normalize=False, files_extension=['.jpg'])
# print('Done creating train.h5')
# build_hdf5_image_dataset(validfile, image_shape=(224, 224),
#                          mode='folder', output_path='../data/valid.h5',
#                          categorical_labels=True, normalize=False, files_extension=['.jpg'])
# print('Done creating valid.h5')

build_hdf5_image_dataset(totaltrain,
                         image_shape=(224, 224),
                         mode='folder',
                         output_path='../data/totaltrain_crop.h5',
                         categorical_labels=True,
                         normalize=False,
                         files_extension=['.jpg'])
print('Done creating totaltrain.h5')
Beispiel #29
0
from tflearn.data_utils import image_preloader
from tflearn.data_utils import build_hdf5_image_dataset

# Cross subject dataset
dataset_train = 'D:\PythonProj\DataSet\Hand4/train'
dataset_test = 'D:\PythonProj\DataSet\Hand4/test'

# build_hdf5_image_dataset(dataset_train, image_shape=(32, 32), mode='folder', output_path='hand_train_32.h5', categorical_labels=True, normalize=True)
# build_hdf5_image_dataset(dataset_test, image_shape=(32, 32), mode='folder', output_path='hand_test_32.h5', categorical_labels=True, normalize=True)

# testing
# X, Y = image_preloader(dataset_file, image_shape=(32, 32), mode='folder', categorical_labels=True, normalize=True)
# # Load HDF5 dataset
# h5f = h5py.File('hand_train.h5', 'r')

# CrossValidation dataset
dataset_train = 'D:\PythonProj\DataSet\Hand4\CrossValidation/4/train'
dataset_test = 'D:\PythonProj\DataSet\Hand4\CrossValidation/4/test'

build_hdf5_image_dataset(dataset_train,
                         image_shape=(32, 32),
                         mode='folder',
                         output_path='hand_train_32_S4.h5',
                         categorical_labels=True,
                         normalize=True)
build_hdf5_image_dataset(dataset_test,
                         image_shape=(32, 32),
                         mode='folder',
                         output_path='hand_test_32_S4.h5',
                         categorical_labels=True,
                         normalize=True)
Beispiel #30
0
from __future__ import division, print_function, absolute_import
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression
from tflearn.data_utils import build_hdf5_image_dataset
import h5py

dataset_train = 'train'
dataset_evaluate = 'test-boat\primotest.txt'
build_hdf5_image_dataset(dataset_train,
                         image_shape=(128, 38),
                         mode='folder',
                         output_path='dataset.h5',
                         categorical_labels=True,
                         normalize=True)
build_hdf5_image_dataset(dataset_evaluate,
                         image_shape=(128, 38),
                         mode='file',
                         output_path='evaluate.h5',
                         categorical_labels=True,
                         normalize=True)

h5f_t = h5py.File('dataset.h5', 'r')
X = h5f_t['X']
Y = h5f_t['Y']

h5f_e = h5py.File('evaluate.h5', 'r')
Xe = h5f_e['X']
Ye = h5f_e['Y']
Beispiel #31
0
from tflearn.data_utils import build_hdf5_image_dataset
import h5py

trainset = "train"
testset = "test"
build_hdf5_image_dataset(testset,
                         image_shape=(224, 224),
                         mode='folder',
                         output_path='new_test.h5',
                         categorical_labels=True,
                         normalize=False)

build_hdf5_image_dataset(testset,
                         image_shape=(224, 224),
                         mode='folder',
                         output_path='new_val.h5',
                         categorical_labels=True,
                         normalize=False)

print('Done creating new_test.h5')
build_hdf5_image_dataset(trainset,
                         image_shape=(224, 224),
                         mode='folder',
                         output_path='new_train.h5',
                         categorical_labels=True,
                         normalize=False)
print('Done creating new_train.h5')