Python read_data_sets 예제들, load_data.read_data_sets Python 예제들

예제 #1

0

파일 보기

    # 6. Output
    _O = [tf.matmul(x, _W['out']) + _b['out'] for x in _LSTM_O]
    _O = tf.pack(_O)
    # Return!
    return {
        'X': _X,
        'H': _H,
        'Hsplit': _Hsplit,
        'LSTM_O': _LSTM_O,
        'LSTM_S': _LSTM_S,
        'O': _O
    }


# Load MNIST, our beloved friend
mnist = load_data.read_data_sets("/home/a/workspace/ssd/DataSets/mnist_2/",\
                         "/home/a/workspace/ssd/DataSets/mnist_2_test/",one_hot=False)
trainimgs, trainlabels, testimgs, testlabels = mnist.train.images,\
                                               mnist.train.labels,\
                                               mnist.test.images,\
                                               mnist.test.labels

ntrain, ntest, dim, nclasses \
 = trainimgs.shape[0], testimgs.shape[0], trainimgs.shape[1], trainlabels.shape[1]
print "ntrain:  ", ntrain
print "dim:     ", dim
print "nclasses: ", nclasses
nclasses = 11

print("MNIST loaded")

# Training params

예제 #2

0

파일 보기

파일: lstm_ctc_mnist.py 프로젝트: AdamShum/CaptchaRecognition

    outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)

 

    outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
    results=[]
    for i in range(len(outputs)) : 
        result = tf.matmul(outputs[i], weights['out']) + biases['out']    # shape = (128, 10)
        results.append(result)
    return results
    
  

# Load MNIST, our beloved friend
mnist = load_data.read_data_sets("1",\
                         "2",one_hot=False)
trainimgs, trainlabels, testimgs, testlabels = mnist.train.images,\
                                               mnist.train.labels,\
                                               mnist.test.images,\
                                               mnist.test.labels

nclasses = 36

print ("MNIST loaded")

# Training params
training_epochs =  4000
batch_size      =  1
display_step    =  1
#learning_rate   =  0.001
learning_rate   = 0.001

예제 #3

0

파일 보기

import load_data
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

np.random.seed(100)
# return DataSet class
data = load_data.read_data_sets(one_hot=True)

# get train data and labels by batch size
train_x, train_label = data.train.next_batch(84420)

# get test data
test_x = data.test.data

# get test labels
test_labels = data.test.labels
# get sample number
n_samples = data.train.num_examples
# use knn for classification
knn = KNeighborsClassifier(n_neighbors=3)
# train the model
knn.fit(train_x, train_label)
# predict the values
y_pred = knn.predict(test_x)
# print accuracy
print("Accuracy", accuracy_score(y_pred, test_labels) * 100)

예제 #4

0

파일 보기

파일: predict.py 프로젝트: qwzhong1988/lstm_learn


def parse_args1():
    parser = argparse.ArgumentParser()
    parser.add_argument("pr")
    #parser.add_argument("-name","--names_print")
    #parser.add_argument("-inc","--increments")
    #print parser.pr
    return parser.parse_args()


args1 = parse_args1()
# Load MNIST, our beloved friend
mnist = load_data.read_data_sets(
    "/mnt/d/workspace/ubuntu/workspace/ocr/" + args1.pr,
    "/mnt/d/workspace/ubuntu/workspace/ocr/" + args1.pr,
    one_hot=True,
    validation_size=1)
trainimgs, trainlabels, testimgs, testlabels \
 = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
ntrain, ntest, dim, nclasses \
 = trainimgs.shape[0], testimgs.shape[0], trainimgs.shape[1], trainlabels.shape[1]
print("MNIST loaded")

# Recurrent neural network
diminput = 28
dimhidden = 128
dimoutput = nclasses
nsteps = 28
weights = {
    'hidden': tf.Variable(tf.random_normal([diminput, dimhidden])),

예제 #5

0

파일 보기

파일: truth.py 프로젝트: qwzhong1988/lstm_learn

def sparse_tuple_from(sequences, dtype=np.int32):
    """Create a sparse representention of x.
    Args:
        sequences: a list of lists of type dtype where each element is a sequence
    Returns:
        A tuple with (indices, values, shape)
    """
    indices = []
    values = []

    for n, seq in enumerate(sequences):
    	#print "Seq is :               ",seq
        indices.extend(zip([n]*(seq[0].shape[0]), xrange((seq[0].shape[0])))
        #print "length is : ",seq[0].shape
        values.extend([seq[0][0]])
        values.extend([seq[0][0]])

    indices = np.asarray(indices, dtype=np.int64)
    values = np.asarray(values, dtype=dtype)
    shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64)

    return indices, values, shape

def _RNN(_X, _istate, _W, _b, _nsteps, _name):
    # 1. Permute input from [batchsize, nsteps, diminput] => [nsteps, batchsize, diminput]
    _X = tf.transpose(_X, [1, 0, 2])
    # 2. Reshape input to [nsteps*batchsize, diminput]
    _X = tf.reshape(_X, [-1, diminput])
    # 3. Input layer => Hidden layer
    _H = tf.matmul(_X, _W['hidden']) + _b['hidden']
    # 4. Splite data to 'nsteps' chunks. An i-th chunck indicates i-th batch data
    _Hsplit = tf.split(0, _nsteps, _H)
    # 5. Get LSTM's final output (_O) and state (_S)
    #    Both _O and _S consist of 'batchsize' elements
    with tf.variable_scope(_name):
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(dimhidden, forget_bias=1.0)
        _LSTM_O, _LSTM_S = tf.nn.rnn(lstm_cell, _Hsplit, initial_state=_istate)
    # 6. Output
    _O = [tf.matmul(x, _W['out']) + _b['out'] for x in _LSTM_O]
    _O = tf.pack(_O)
    # Return!
    return {
        'X': _X, 'H': _H, 'Hsplit': _Hsplit,
        'LSTM_O': _LSTM_O, 'LSTM_S': _LSTM_S, 'O': _O
    }

# Load MNIST, our beloved friend
#mnist = input_data.read_data_sets("../../DataSets",one_hot=False)
mnist = load_data.read_data_sets("/home/a/workspace/ssd/DataSets/mnist_2/",\
                      "/home/a/workspace/ssd/DataSets/mnist_2/",one_hot=False)
trainimgs, trainlabels, testimgs, testlabels = mnist.train.images,\
                                               mnist.train.labels,\
                                               mnist.test.images,\
                                               mnist.test.labels

ntrain, ntest, dim = trainimgs.shape[0],\
                     testimgs.shape[0],\
                     trainimgs.shape[1],\
                     #trainlabels.shape[1]
nclasses = 10
print "ntrain:    ",ntrain
print "ntest :    ",ntest
print "dim:       ",dim
print "classes:   ",trainlabels.shape

print ("MNIST loaded")

# Training params
training_epochs = 1500
batch_size = 10
display_step = 1
learning_rate = 0.01

# Recurrent neural network params
diminput = 28
dimhidden = 128
# here we add the blank label
dimoutput = nclasses + 1
nsteps = 28

graph = tf.Graph()
with graph.as_default():
    weights = {
        'hidden': tf.Variable(tf.random_normal([diminput, dimhidden])),
        'out': tf.Variable(tf.random_normal([dimhidden, dimoutput]))
    }
    biases = {
        'hidden': tf.Variable(tf.random_normal([dimhidden])),
        'out': tf.Variable(tf.random_normal([dimoutput]))
    }


    #**************************************************
    # will be used in CTC_LOSS
    x = tf.placeholder(tf.float32, [None, nsteps, diminput])
    shape_x = tf.shape(x)

    istate = tf.placeholder(tf.float32, [None, 2*dimhidden]) #state & cell => 2x n_hidden
    # [batch_size , 1]
    y = tf.sparse_placeholder(tf.int32)
    # 1d array of size [batch_size]
    # Seq len indicates the quantity of true data in the input, since when working with batches we have to pad with zeros to fit the input in a matrix
    seq_len = tf.placeholder(tf.int32, [None])

    myrnn = _RNN(x, istate, weights, biases, nsteps, 'basic')
    pred = myrnn['O']
    # [28 25 11] [nsteps, batch_size,num_nclass]
    shape_Pred = tf.shape(pred)
    #**************************************************
    # we add ctc module
    
    loss = ctc.ctc_loss(pred, y, seq_len)
    shape_y = tf.shape(y)

    cost = tf.reduce_mean(loss)

    # Adam Optimizer
    optm = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    #Decode the best path
    decoded, log_prob = ctc.ctc_greedy_decoder(pred, seq_len)


    accr = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), y))
    init = tf.initialize_all_variables()
    print ("Network Ready!")


with tf.Session(graph=graph) as sess:
    sess.run(init)
    summary_writer = tf.train.SummaryWriter('./logs/', graph=sess.graph)
    print ("Start optimization")
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            batch_xs = batch_xs.reshape((batch_size, nsteps, diminput))
            #print "shape of batch_xs is :     ",batch_xs.shape
            # Fit training using batch data
            feed_dict={x: batch_xs, y: sparse_tuple_from([[value] for value in batch_ys]), \
                        istate: np.zeros((batch_size, 2*dimhidden)), \
                        seq_len: [nsteps for _ in xrange(batch_size)]}

            _, batch_cost = sess.run([optm, cost], feed_dict=feed_dict)
            # Compute average loss
            avg_cost += batch_cost*batch_size
            #print "COST_pred shape is :",pred.shape
        avg_cost /= len(trainimgs)
        # Display logs per epoch step
        if epoch % display_step == 0:
            print ("Epoch: %03d/%03d cost: %.9f" % (epoch, training_epochs, avg_cost))


            train_acc = sess.run(accr, feed_dict=feed_dict)
            
            #prediction = sess.run(decoded[0],feed_dict=feed_dict)
            print "  "
            print "**************************XXXXXXXXXXXXXXXXX: "
            print "Shape of x is :   ",sess.run(shape_x,feed_dict=feed_dict)
            print "Shape of y is :   ",sess.run(shape_y,feed_dict=feed_dict)
            print "Shape of pred :   ",sess.run(shape_Pred,feed_dict=feed_dict)
            #print "prediction is :   ",prediction

            print (" Training label error rate: %.3f" % (train_acc))
            testimgs = testimgs.reshape((ntest, nsteps, diminput))

            feed_dict={x: testimgs, y: sparse_tuple_from([[value] for value in testlabels]), istate: np.zeros((ntest, 2*dimhidden)), seq_len: [nsteps for _ in xrange(len(testimgs))]}
            test_acc = sess.run(accr, feed_dict=feed_dict)
            print (" Test label error rate: %.3f" % (test_acc))
            p_xs, p_ys = mnist.train.next_batch(batch_size)
            p_xs = p_xs.reshape((batch_size,nsteps,diminput))
            '''
            feed_dict ={x:p_xs,y:sparse_tuple_from([[value] for value in p_ys]), 
                       istate: np.zeros((ntest, 2*dimhidden)),
                       seq_len: [nsteps for _ in xrange(batch_size)]}
            prediction = sess.run(decoded[0],feed_dict=feed_dict)
            print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX: ",prediction
            '''
print ("Optimization Finished.")

예제 #6

0

파일 보기

파일: Final_Project.py 프로젝트: komalbarge45/DeepLearning

def main():
    # return dataset from load_data function in load_data.py script
    data = load_data.read_data_sets(one_hot=True)

    # get train data and labels by batch size
    # training = 84420 and testing = 58128
    BATCH_SIZE = 84420
    train_x, train_labels = data.train.next_batch(BATCH_SIZE)

    # get test data
    test_x = data.test.data

    # get test labels
    test_labels = data.test.labels

    # to get class ratio in the testing dataset
    total_count = 0
    pos_count = 0
    neg_count = 0
    neu_count = 0

    #Label formation
    train_label = []
    for i in range(len(train_labels)):
        total_count = total_count + 1
        if train_labels[i, 0] == 1:
            # If emotion is Positive set label to 1
            train_label.append(1)
            pos_count = pos_count + 1
        if train_labels[i, 1] == 1:
            # If emotion is neutral set label to 0
            train_label.append(0)
            neu_count = neu_count + 1
        if train_labels[i, 2] == 1:
            # If emotion is negative set label to -1
            train_label.append(-1)
            neg_count = neg_count + 1

    test_label = []
    for i in range(len(test_labels)):
        total_count = total_count + 1
        if test_labels[i, 0] == 1:
            # If emotion is Positive set label to 1
            test_label.append(1)
            pos_count = pos_count + 1
        if test_labels[i, 1] == 1:
            # If emotion is neutral set label to 0
            test_label.append(0)
            neu_count = neu_count + 1
        if test_labels[i, 2] == 1:
            # If emotion is negative set label to -1
            test_label.append(-1)
            neg_count = neg_count + 1

    # Merge train and test dataset
    train_np = np.array(train_x)
    test_np = np.array(test_x)
    dataset = np.concatenate((train_np, test_np))

    # Merge train and test labels
    train_label_np = np.array(train_label)
    test_label_np = np.array(test_label)
    dataset_label = np.concatenate((train_label_np, test_label_np))

    # Starting timer
    start_time_lda = time.time()

    # Implementing pre processing steps such as data augmentation and feature extraction
    train_fe, test_fe = preprocessing_methods(dataset, dataset_label)

    # Implementing CNN model for classification
    convolution_model(train_fe, train_labels, test_fe, test_labels,
                      start_time_lda)

예제 #7

0

파일 보기

파일: lstm_mnist.py 프로젝트: liliang1991/machinelearning

#!/usr/bin/env python
# coding=utf-8
# Import packages
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import numpy as np
import load_data
#import matplotlib.pyplot as plt
#matplotlib inline
print("Packages imported")

# Load MNIST, our beloved friend
mnist = load_data.read_data_sets("/home/moon/work/shendu/mnist_data",
                                 "/home/moon/work/shendu/mnist_data",
                                 one_hot=False,
                                 validation_size=5000)
trainimgs, trainlabels, testimgs, testlabels \
    = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
ntrain, ntest, dim, nclasses \
    = trainimgs.shape[0], testimgs.shape[0], trainimgs.shape[1], trainlabels.shape[1]
print("MNIST loaded")

# Recurrent neural network
diminput = 80
dimhidden = 128
dimoutput = nclasses
nsteps = 120
weights = {
    'hidden': tf.Variable(tf.random_normal([diminput, dimhidden])),
    'out': tf.Variable(tf.random_normal([dimhidden, dimoutput]))
}

예제 #8

0

파일 보기

#!/usr/bin/env python
# coding=utf-8
# Import packages
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import numpy as np
import load_data
#import matplotlib.pyplot as plt
#matplotlib inline
print("Packages imported")

# Load MNIST, our beloved friend
mnist = load_data.read_data_sets("/home/a/workspace/ssd/DataSets/mnist",
                                 "/home/a/workspace/ssd/DataSets/mnist_test",
                                 one_hot=False,
                                 validation_size=5000)
trainimgs, trainlabels, testimgs, testlabels \
 = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
ntrain, ntest, dim, nclasses \
 = trainimgs.shape[0], testimgs.shape[0], trainimgs.shape[1], trainlabels.shape[1]
print("MNIST loaded")

# Recurrent neural network
diminput = 80
dimhidden = 128
dimoutput = nclasses
nsteps = 120
weights = {
    'hidden': tf.Variable(tf.random_normal([diminput, dimhidden])),
    'out': tf.Variable(tf.random_normal([dimhidden, dimoutput]))
}

예제 #9

0

파일 보기

def run_training():
    """Train, evaluate and test the model
    """
    #get the data and format it as DataSet tuples
    data_sets = ld.read_data_sets(FLAGS.train_dir, FLAGS.train_file)
    feature_groups = data_sets.train.feature_groups
    feature_size = data_sets.train.feature_size
    dm.FEATURE_SIZE = feature_size  #set the feature size of the datasets

    if FLAGS.skip_eval == 'NO':
        #output the evaluation results
        if not os.path.exists(FLAGS.evaluation_dir):
            os.makedirs(FLAGS.evaluation_dir)
        eval_output = open(FLAGS.evaluation_dir + "/" + FLAGS.evaluation_file,
                           "w")
        para_info = get_parameters()
        eval_output.write("#" + para_info + "\n")
        eval_output.write("train_steps\teval_data_type\tloss\tcindex\n")

        #output the evaluation cindex detail
        if not os.path.exists(FLAGS.evaluation_dir):
            os.makedirs(FLAGS.evaluation_dir)
        if FLAGS.output_evaluation_cindex == 'YES':
            eval_cindex_output = open(
                FLAGS.evaluation_dir + "/" + FLAGS.evaluation_cindex_file, "w")
            eval_cindex_output.write(
                "eval_type\tdates\tdates_predicted\tcensorship\tcindex\n")

    #Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        #Generate input placeholder
        feature_pl, at_risk_pl, date_pl, censor_pl = placeholder_inputs(
            FLAGS.batch_size)

        # Build the graph and get the prediction from the inference model

        isTrain_pl = tf.placeholder(
            tf.bool, shape=()
        )  #boolean value to check if it is during training optimization process
        if FLAGS.model == 'NN':
            hidden_nodes = [int(x) for x in FLAGS.hidden_nodes.split(",")]
            #inf_output_pl=tf.cond(isTrain_pl,inf_train,inf_nontrain)
            inf_output_pl = dm.inference(feature_pl, hidden_nodes,
                                         FLAGS.activation,
                                         FLAGS.dropout_keep_rate, isTrain_pl)
            #inf_output_pl=dm.inference(feature_pl,hidden_nodes,FLAGS.activation)
        elif FLAGS.model == 'linear':
            #alpha should equal to 1
            inf_output_pl = dm.inference_linear(feature_pl)

        # Add to the Graph the Ops for loss calculation.
        #loss=dm.loss(inf_output_pl,at_risk_pl,censor_pl,feature_groups,FLAGS.batch_size,FLAGS.alpha,FLAGS.scale,FLAGS.reg_type)
        loss = dm.loss(inf_output_pl, censor_pl, feature_groups,
                       FLAGS.batch_size, FLAGS.alpha, FLAGS.scale, FLAGS.delta,
                       FLAGS.reg_type)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = dm.training(loss, FLAGS.initial_learning_rate)

        # Add evaluation to the Graph
        #cindex_pl=dm.evaluation(inf_output,date_pl,censor_pl)

        # Build the summary Tensor based on the TF collection of Summaries.
        #summary=tf.summary.merge_all()

        # Add the variable initializer Op.
        init = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        #summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # Run the Op to initialize the variables.
        sess.run(init)

        # Begin the training loops.
        for step in range(FLAGS.max_steps):
            start_time = time.time()

            #fill the data
            feed_dict = fill_feed_dict(
                data_sets.train.next_batch(FLAGS.batch_size), feature_pl,
                at_risk_pl, date_pl, censor_pl)
            feed_dict[isTrain_pl] = True
            #run training, update parameters
            _, loss_value, output = sess.run([train_op, loss, inf_output_pl],
                                             feed_dict=feed_dict)
            #_, loss_value, loss2_value , output= sess.run([train_op,loss, loss2 ,inf_output_pl],feed_dict=feed_dict) #TEST

            #print("output type:"+str(type(output)))
            #print("output shape:"+str(output.shape))

            duration = time.time() - start_time

            if step % 5 == 0:
                pass
            # Print status to stdout.
            #print('Step %d: loss = %.2f (%.3f sec) : output max = %.2f' % (step, loss_value, duration, output.max()))
            # Update the events file.
            #summary_str = sess.run(summary, feed_dict=feed_dict)
            #summary_writer.add_summary(summary_str, step)
            #summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 5 == 0 or (step + 1) == FLAGS.max_steps:
                print('Step: %d , duration: %.3f sec' % (step + 1, duration))
                #Save the variables to disk
                checkpoint_file = os.path.join(FLAGS.saver_file_dir,
                                               FLAGS.saver_file_prefix)
                saver.save(sess, checkpoint_file, global_step=step)
                if FLAGS.skip_eval == 'NO':
                    if FLAGS.output_evaluation_cindex != 'YES':
                        eval_cindex_output = 0
                    # Evaluate agianst the traing data.
                    print("Training data evaluation:")
                    do_eval(sess, eval_cindex_output, eval_output,
                            "training_data", step + 1, loss, inf_output_pl,
                            feature_pl, at_risk_pl, date_pl, censor_pl,
                            data_sets.train, isTrain_pl, False)

                    print("Validation data evaluation:")
                    do_eval(sess, eval_cindex_output, eval_output,
                            "validation_data", step + 1, loss, inf_output_pl,
                            feature_pl, at_risk_pl, date_pl, censor_pl,
                            data_sets.validation, isTrain_pl, False)

                    print("Testing data evaluation:")
                    do_eval(sess, eval_cindex_output, eval_output,
                            "testing_data", step + 1, loss, inf_output_pl,
                            feature_pl, at_risk_pl, date_pl, censor_pl,
                            data_sets.test, isTrain_pl, False)
    if FLAGS.skip_eval == 'NO':
        eval_output.close()
        if FLAGS.output_evaluation_cindex == 'YES':
            eval_cindex_output.close()