def read_bills_data(train_dir, queried_idxs=None):
    data_sets = DataSets()
    train_x, train_y, train_ids, test_x, test_y, test_ids = load_pickled_data(
        train_dir)
    try:
        np.shape(train_y)[1]
    except:
        train_y = dense_to_one_hot(train_y)
        test_y = dense_to_one_hot(test_y)
    if queried_idxs is not None:
        all_train_idxs = np.arange(len(train_y))
        queried_docs = train_x[queried_idxs]
        queried_labels = train_y[queried_idxs]
        unqueried_idxs = np.setdiff1d(all_train_idxs, queried_idxs)

        remaining_docs = train_x[unqueried_idxs]
        remaining_labels = train_y[unqueried_idxs]

        data_sets.train = DataSet(queried_docs, queried_labels, queried_idxs)
        data_sets.unqueried = DataSet(remaining_docs, remaining_labels,
                                      unqueried_idxs)
    else:
        data_sets.train = DataSet(train_x, train_y, train_ids)
    data_sets.test = DataSet(test_x, test_y, test_ids)

    return data_sets
def load_unlabeled_corpus_111(train_dir):
    data_sets = DataSets()
    x, y, ids = load_pickled_corpus111_data(train_dir)
    try:
        np.shape(train_y)[1]
    except:
        y = dense_to_one_hot(y.astype(int))
    data_sets.unlabeled = DataSet(x, y, ids)
    return data_sets
Ejemplo n.º 3
0
def read_data(base_dir,
              normalize=True,
              validation_size=5000,
              one_hot=False,
              flatten=True):
    check_data(base_dir)

    train_images = read_image(path.join(base_dir, TRAIN_IMAGES), flatten)
    test_images = read_image(path.join(base_dir, TEST_IMAGES), flatten)

    if normalize:
        train_images = train_images / 255.0
        test_images = test_images / 255.0

    train_labels = read_label(path.join(base_dir, TRAIN_LABELS), one_hot)
    validation = DataSet(images=train_images[:validation_size],
                         labels=train_labels[:validation_size])
    train = DataSet(images=train_images[validation_size:],
                    labels=train_labels[validation_size:])

    test_labels = read_label(path.join(base_dir, TEST_LABELS), one_hot)
    test = DataSet(images=test_images, labels=test_labels)

    return Datasets(train=train, validation=validation, test=test)
def load_unlabeled_bills(train_dir):
    data_sets = DataSets()
    train_x, train_y, train_ids, test_x, test_y, test_ids = load_pickled_data(
        train_dir)
    try:
        np.shape(train_y)[1]
    except:
        train_y = dense_to_one_hot(train_y.astype(int))
        test_y = dense_to_one_hot(test_y.astype(int))
    data_sets.unlabeled = DataSet(train_x, train_y, train_ids)
    #data_sets.unlabeled = DataSet(test_x, test_y, test_ids)
    """
  data_sets.train = DataSet(train_x, train_y, train_ids)
  data_sets.test  = DataSet(test_x,  test_y,  test_ids)
  """
    return data_sets
Ejemplo n.º 5
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import utils.plot as plot
from utils.preprocessing import *
from data.DigitSet import DigitSet
from data.DataSet import DataSet
import os

#%%
folder = os.path.join("files", "dataset")
dataset = DataSet(folder)
dataset.apply(apply_mean_centering)
dataset.apply(apply_unit_distance_normalization)
dataset.apply(lambda x: normalize_pressure_value(x, 512))

#%%
filename = os.path.join(folder, "10.43_23.03.2018_digitset.json")
digitset = DigitSet(filename)
scaled = digitset.copy()
# Apply transformations
scaled.apply(apply_mean_centering)
scaled.apply(apply_unit_distance_normalization)
scaled.apply(lambda x: normalize_pressure_value(x, 512))
if scaled.time_is_dt():
    scaled.convert_dt_to_t()

#%%
digit, label = digitset[6]
plot.show_digit(digit, label=label, 
                show_lines=True, show_points=True, 
Ejemplo n.º 6
0
# -*- coding: utf-8 -*-

# allow the notebook to access the parent directory so we can import the other modules
# https://stackoverflow.com/a/35273613
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

dataset_folder_path = os.path.join("files", "dataset")

#%%
from data.DataSet import DataSet
dataset = DataSet()
dataset.load(dataset_folder_path,
             test_set_percentage=0.2,
             validation_set_percentage=0.3333)

print("Training Data Len:", len(dataset.train_data))
print("Validation Data Len:", len(dataset.valid_data))
print("Test Data Len:", len(dataset.test_data))

#%% Load Model
from keras.models import load_model

TRAINED_MODEL = os.path.join("files", "checkpoints", "1525696834.4091375",
                             "regularized_3x512_gru-30-0.97.hdf5")
model = load_model(TRAINED_MODEL)

#%%
Ejemplo n.º 7
0
#encoding=utf-8
import numpy as np
#import algo.knn.knn
from data.DataSet import DataSet
from data.DigitDataSet import DigitDataSet

dataset = DataSet(file_path = "gaofeng_file")
dataset2 = DataSet(folder_path = "gaofeng_folder")

digit_dataset = DigitDataSet(
                file_path="gaofeng_file")
digit_dataset2 = DigitDataSet(folder_path = "gaofeng_folder")