def compare_dataset(dataset1_path, dataset2_path):
  print("dataset1: ", dataset1_path)
  print("dataset2: ", dataset2_path)
  dataset1 = hppi.read_data_sets(dataset1_path)
  dataset2 = hppi.read_data_sets(dataset2_path)
  datas1 = dataset1.datas
  datas2 = dataset2.datas
  is_equal = (datas1==datas2)
  not_equal_locations = [(row, column) for row, x in enumerate(is_equal) for column, y in enumerate(x) if not y]
  max_diff = 0
  print("not_equal_locations:")
  for row, column in not_equal_locations:
    print("%6s,%6s: %s,%s"%(row, column, datas1[row][column], datas2[row][column]))
    max_diff = max(max_diff, math.fabs(datas1[row][column]-datas2[row][column]))
  print("max_diff: ", max_diff)
Example #2
0
def load_hppids(dir):
    hppids = hppi.read_data_sets(dir, one_hot=False)
    X = hppids.datas
    Y = hppids.labels
    print('Success to load ', dir, ', Shape: ', X.shape)

    return pandas.DataFrame(X), pandas.DataFrame(Y)
Example #3
0
def main():

    from keras.wrappers.scikit_learn import KerasClassifier

    model = KerasClassifier(build_fn=create_model,
                            input_dim=686,
                            hidden_units=[256, 256, 256],
                            kernel_initializer='uniform',
                            activation='relu',
                            dropout_rate=0.4,
                            loss='binary_crossentropy',
                            optimizer='adam',
                            metrics=['accuracy'],
                            epochs=50,
                            batch_size=128)

    import hppi

    hppids = hppi.read_data_sets("data/02-ct-bin", one_hot=False)
    X = hppids.datas
    Y = hppids.labels

    from sklearn.model_selection import StratifiedKFold, cross_val_score

    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)
    results = cross_val_score(model, X, Y, cv=kfold)

    print(np.average(results))
Example #4
0
def load_test_data(data_path):

    hppids = hppi.read_data_sets(data_path, one_hot=True)

    inp_dims = len(hppids.test.datas[0])

    test_datas = np.reshape(hppids.test.datas,
                            (len(hppids.test.datas), 1, inp_dims))

    return test_datas, hppids.test.labels
Example #5
0
def load_data_sets(data_sets_dir):
    hppids = hppi.read_data_sets(data_sets_dir, one_hot=False)
    train_datas, train_labels, test_datas, test_labels = hppids.shuffle(
    ).split()

    # train_datas  = train_datas [:100]
    # train_labels = train_labels[:100]
    # test_datas   = test_datas  [:100]
    # test_labels  = test_labels [:100]

    return train_datas, train_labels, test_datas, test_labels
Example #6
0
def load_train_data(data_path):

    hppids = hppi.read_data_sets(data_path, one_hot=True)

    train_length, train_datas, train_labels, valid_length, valid_datas, valid_labels = hppids.train.shuffle(
    ).split(ratio=0.8)

    inp_dims = len(train_datas[0])

    train_datas = np.reshape(train_datas, (len(train_datas), 1, inp_dims))
    valid_datas = np.reshape(valid_datas, (len(valid_datas), 1, inp_dims))

    return train_length, train_datas, train_labels, valid_length, valid_datas, valid_labels, inp_dims
def train_and_test(data_sets_dir, classifier):
    # Load datasets.
    hppids = hppi.read_data_sets(data_sets_dir, one_hot=False)
    train_datas, train_labels, test_datas, test_labels = hppids.shuffle(
    ).split()

    # train_datas  = train_datas [:100]
    # train_labels = train_labels[:100]
    # test_datas   = test_datas  [:100]
    # test_labels  = test_labels [:100]

    # train
    begin_time = datetime.now()
    classifier.fit(train_datas, train_labels)
    end_time = datetime.now()
    train_time = (end_time - begin_time).total_seconds()

    # test
    begin_time = datetime.now()
    mean_accuracy = classifier.score(test_datas, test_labels)
    end_time = datetime.now()
    test_time = (end_time - begin_time).total_seconds()

    # predict
    begin_time = datetime.now()
    prediction = classifier.predict(test_datas)
    # confusion_matrix(test_labels, prediction)
    end_time = datetime.now()
    predict_time = (end_time - begin_time).total_seconds()

    fpr, tpr, thresholds = roc_curve(test_labels, prediction)

    return (
        mean_accuracy,
        auc(fpr, tpr),
        average_precision_score(test_labels, prediction),
        recall_score(test_labels, prediction),
        log_loss(test_labels, prediction),
        train_time,
        test_time,
        predict_time,
    )
A Bi-directional Recurrent Neural Network (LSTM) implementation example using 
TensorFlow library.

Author: Gui Yuanmiao
Project: https://github.com/smalltalkman/hppi-tensorflow/
"""

from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np

# Import HPPI data
import os, hppi
hppids = hppi.read_data_sets(os.getcwd() + "/data/09-hppids", one_hot=True)
'''
To classify images using a bidirectional recurrent neural network, we consider
every image row as a sequence of pixels. Because MNIST image shape is 28*28px,
we will then handle 28 sequences of 28 steps for every sample.
'''

# Training Parameters
learning_rate = 0.001
training_steps = 10000
batch_size = 128
display_step = 200

# Network Parameters
num_input = 14  # HPPI data input (data shape: 14*79=1106)
timesteps = 79  # timesteps
Example #9
0
def main():
    # Load datasets.
    hppids = hppi.read_data_sets(data_sets_dir, one_hot=False)

    # Specify that all features have real-value data
    feature_columns = [
        tf.feature_column.numeric_column("x", shape=[num_input])
    ]

    # Build 3 layer DNN with 10, 20, 10 units respectively.
    classifier = tf.estimator.DNNClassifier(
        feature_columns=feature_columns,
        # input_layer_partitioner=None,
        # hidden_units=[10, 20, 10],
        # hidden_units=[256, 256, 256],
        hidden_units=hidden_units,
        # activation_fn=tf.nn.relu,
        n_classes=num_classes,
        # optimizer='Adagrad',
        # optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate),
        optimizer=optimizer,
        dropout=dropout,
        model_dir=model_dir)
    # Define the training inputs
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": hppids.train.datas},
        y=hppids.train.labels,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True,
        queue_capacity=hppids.train.length)

    # Train model.
    begin_time = datetime.now()
    classifier.train(input_fn=train_input_fn, steps=num_steps)
    end_time = datetime.now()
    train_time = (end_time - begin_time).total_seconds() / num_steps * 100

    # Define the test inputs
    test_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": hppids.test.datas},
        y=hppids.test.labels,
        batch_size=batch_size,
        num_epochs=1,
        shuffle=False)

    # Evaluate accuracy.
    #accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]
    # Evaluate scores.
    begin_time = datetime.now()
    scores = classifier.evaluate(input_fn=test_input_fn)
    end_time = datetime.now()
    test_time = (end_time - begin_time).total_seconds()

    scores_str =   "global_step = {0:08d}".format(scores["global_step"]) \
               + ", accuracy = {0:8g}".format(scores["accuracy"]) \
               + ", accuracy_baseline = {0:8g}".format(scores["accuracy_baseline"]) \
               + ", auc = {0:8g}".format(scores["auc"]) \
               + ", auc_precision_recall = {0:8g}".format(scores["auc_precision_recall"]) \
               + ", average_loss = {0:8g}".format(scores["average_loss"]) \
               + ", label/mean = {0:8g}".format(scores["label/mean"]) \
               + ", loss = {0:8g}".format(scores["loss"]) \
               + ", prediction/mean = {0:8g}".format(scores["prediction/mean"]) \
               + ", train_time = {0:8g}".format(train_time) \
               + ", test_time = {0:8g}".format(test_time) \

    #print("\nTest Accuracy: {0:f}\n".format(accuracy_score))
    print("\nTest scores: {0}\n".format(scores_str))

    with open(result_file, "a") as file:
        file.write(scores_str + "\n")
def once(data_sets_dir, data_sets_info
       , num_input, hidden_units, activation_fn, num_classes, optimizer, learning_rate, dnn_info
       , num_steps
       , model_dir_root
       , result_dir_root
       ):
  model_info = "_{0}({1:d}x{2:d})_{3}_{4}_{5:g}".format(
                 data_sets_info
               , num_input
               , num_classes
               , 'x'.join([str(n) for n in hidden_units])
               , dnn_info
               , learning_rate
               )
  model_dir = model_dir_root+model_info
  result_file = result_dir_root+model_info+".txt"

  # Load datasets.
  hppids = hppi.read_data_sets(data_sets_dir, one_hot=False)
  hppids.shuffle().split(apply=True)

  # Specify that all features have real-value data
  feature_columns = [tf.feature_column.numeric_column("x", shape=[num_input])]

  # Build 3 layer DNN with 10, 20, 10 units respectively.
  classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
                                          # input_layer_partitioner=None,
                                          # hidden_units=[10, 20, 10],
                                          hidden_units=hidden_units,
                                          # activation_fn=tf.nn.relu,
                                          activation_fn=activation_fn,
                                          n_classes=num_classes,
                                          # optimizer='Adagrad',
                                          optimizer=optimizer(learning_rate=learning_rate),
                                          model_dir=model_dir)
  # Define the training inputs
  train_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": hppids.train.datas},
      y=hppids.train.labels,
      num_epochs=None,
      shuffle=True,
      queue_capacity=hppids.train.length)

  # Train model.
  classifier.train(input_fn=train_input_fn, steps=num_steps)

  # Define the test inputs
  test_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": hppids.test.datas},
      y=hppids.test.labels,
      num_epochs=1,
      shuffle=False)

  # Evaluate accuracy.
  #accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]
  # Evaluate scores.
  scores = classifier.evaluate(input_fn=test_input_fn)
  scores_str =   "global_step = {0:08d}".format(scores["global_step"]) \
             + ", accuracy = {0:8g}".format(scores["accuracy"]) \
             + ", accuracy_baseline = {0:8g}".format(scores["accuracy_baseline"]) \
             + ", auc = {0:8g}".format(scores["auc"]) \
             + ", auc_precision_recall = {0:8g}".format(scores["auc_precision_recall"]) \
             + ", average_loss = {0:8g}".format(scores["average_loss"]) \
             + ", label/mean = {0:8g}".format(scores["label/mean"]) \
             + ", loss = {0:8g}".format(scores["loss"]) \
             + ", prediction/mean = {0:8g}".format(scores["prediction/mean"]) \

  #print("\nTest Accuracy: {0:f}\n".format(accuracy_score))
  print("\nTest scores: {0}\n".format(scores_str))

  with open(result_file, "a") as file:
    file.write(scores_str+"\n")