Beispiel #1
0
def main():
    x, cs, y = dataset_adult()
    for train_x, train_y, test_x, test_y in kfold(x, y, 10, 'mixed'):
        model = Model(train_x[0].shape[1], np.nanmin(train_x[0], 0),
                      np.nanmax(train_x[0], 0), cs, 'brb')
        ebrb = BaseM(32, train_x[0].shape[1], np.nanmin(train_x[0], 0),
                     np.nanmax(train_x[0], 0), cs, 2, 'con')
        training(ebrb, train_x, train_y)
        evaluating(ebrb, test_x, test_y, 'acc')
        # training(model, train_x, train_y)
        # evaluating(model, test_x, test_y, 'acc')
        exit(0)
def train(data_dir, arch, hidden_units, output_size, dropout, lr, epochs, gpu,
          checkpoint):

    print(
        'Dir: {},\t Arch:{},\t HiddenUints: {},\t lr: {},\t Epochs: {},\t gpu: {}\n'
        .format(data_dir, arch, hidden_units, lr, epochs, gpu))

    print('Loading Images from Directory...')
    trainloader, validloader, testloader, class_to_idx = get_loaders(data_dir)
    print('Images Loaded.\n')

    print('Building the Model...')
    model, criterion, optimizer = build_model(arch, hidden_units, output_size,
                                              dropout, lr)
    print('Model Built.\n')

    print('Beggining the Training...')
    model, optimizer = training(model, trainloader, validloader, epochs, 20,
                                criterion, optimizer, gpu)
    print('Training Done.\n')

    if checkpoint:
        print('Saving the Checkpoint...')
        save_checkpoint(checkpoint, model, optimizer, arch,
                        model.classifier[0].in_features, output_size,
                        hidden_units, dropout, class_to_idx, epochs, lr)
        print('Done.')
Beispiel #3
0
def main(args_parser):
    #Dataset
    parser = args_parser
    args = parser.parse_args()

    train_image_data, train_label_data, train_filename, valid_image_data, valid_label_data, valid_filename, unique_classes = get_data(
    )
    #tf.reset_default_graph()
    DATASET_PATH = args.datasetPath
    LEARNING_RATE_1 = args.learningRate
    EPOCHS = args.epochs
    BATCH_SIZE = args.batchSize
    NUM_CLASSES = len(unique_classes)
    Z_SCORE = args.zScore
    WEIGHT_DECAY_1 = args.weightDecay

    print("Current Setup:-")
    print(
        "Starting Learning Rate: {}, Epochs: {}, Batch Size: {}, Confidence Interval Z-Score {}, Number of classes: {}, Starting Weight Decay: {}"
        .format(LEARNING_RATE_1, EPOCHS, BATCH_SIZE, Z_SCORE, NUM_CLASSES,
                WEIGHT_DECAY_1))

    #Placeholders
    learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate')
    weight_decay = tf.placeholder(tf.float32, shape=[], name="weight_decay")

    #Dataset
    training_dataset = tf.data.Dataset.from_generator(
        lambda: itertools.zip_longest(train_image_data, train_label_data,
                                      train_filename),
        output_types=(tf.float32, tf.float32),
        output_shapes=(tf.TensorShape([None, None, 3]), tf.TensorShape([None]),
                       tf.TensorShape([None])))

    training_dataset = training_dataset.repeat(EPOCHS).batch(
        BATCH_SIZE).prefetch(1)
    train_iterator = training_dataset.make_initializable_iterator()
    train_features, train_labels, train_filename = train_iterator.get_next()

    valid_dataset = tf.data.Dataset.from_generator(
        lambda: itertools.zip_longest(valid_image_data, valid_label_data,
                                      valid_filename),
        output_types=(tf.float32, tf.float32),
        output_shapes=(tf.TensorShape([None, None, 3]), tf.TensorShape([None]),
                       tf.TensorShape([None])))

    valid_dataset = valid_dataset.repeat(EPOCHS).batch(BATCH_SIZE).prefetch(1)
    valid_iterator = valid_dataset.make_initializable_iterator()
    valid_features, valid_labels, valid_filename = valid_iterator.get_next()

    #Model
    _, train_op, train_cross_entropy, train_conf_matrix_op, train_accuracy = initiate_vgg_model(
        train_features,
        train_labels,
        train_filename,
        NUM_CLASSES,
        weight_decay,
        learning_rate,
        handle="training",
        reuse_model=None)
    _, _, valid_cross_entropy, valid_conf_matrix_op, valid_accuracy = initiate_vgg_model(
        valid_features,
        valid_labels,
        valid_filename,
        NUM_CLASSES,
        weight_decay,
        learning_rate,
        handle="validation",
        reuse_model=True)

    saver = tf.train.Saver()

    if not os.path.exists(os.path.join("./short_dl_research_train/")):
        os.mkdir(os.path.join("./short_dl_research_train/"))

    with tf.Session() as sess:
        with np.printoptions(threshold=np.inf):
            train_writer = tf.summary.FileWriter(
                "./short_tensorboard_training_logs/")
            valid_writer = tf.summary.FileWriter(
                "./short_tensorboard_validation_logs/")
            train_writer.add_graph(sess.graph)
            valid_writer.add_graph(sess.graph)
            train_highest_acc = 0
            valid_highest_acc = 0
            sess.run([
                tf.global_variables_initializer(),
                tf.local_variables_initializer()
            ])

            for epoch in range(EPOCHS):
                print("Current Epoch: {}/{}".format(epoch, EPOCHS))
                i = 0
                try:
                    sess.run(train_iterator.initializer)
                    while True:
                        print("Current Training Iteration : {}/{}".format(
                            i, floor(int(157252) / BATCH_SIZE)))
                        train_acc, _, _, train_ce = util.training(
                            BATCH_SIZE, NUM_CLASSES, learning_rate,
                            weight_decay, sess, train_op, train_conf_matrix_op,
                            LEARNING_RATE_1, WEIGHT_DECAY_1,
                            train_cross_entropy, train_accuracy)
                        train_value1, train_value2 = util.confidence_interval(
                            train_acc, Z_SCORE, BATCH_SIZE)
                        print("Training Accuracy : {}".format(train_acc))
                        print("Training Loss (Cross Entropy) : {}".format(
                            train_ce))
                        print("Training Confidence Interval: [{} , {}]".format(
                            train_value2, train_value1))
                        if train_highest_acc <= train_acc:
                            train_highest_acc = train_acc
                            print(
                                "Highest Training Accuracy Reached: {}".format(
                                    train_highest_acc))
                            #For every epoch, we will save the model
                            saver.save(
                                sess,
                                os.path.join("./short_dl_research_train/",
                                             "model.ckpt"))
                            print(
                                "Latest Model is saving and Tensorboard Logs are updated"
                            )
                        train_writer.add_summary(
                            tf.summary.merge_all().eval(),
                            epoch * (floor(int(157252) / BATCH_SIZE)) + i)
                        i = i + 1
                except tf.errors.OutOfRangeError:
                    print("End of the training dataset, proceed to validation")
                    pass

                j = 0
                try:
                    sess.run(valid_iterator.initializer)
                    while True:
                        print("Current Validation Iteration : {}/{}".format(
                            j, floor(int(19657) / BATCH_SIZE)))
                        valid_acc, _, valid_ce = util.validation(
                            BATCH_SIZE, NUM_CLASSES, learning_rate,
                            weight_decay, sess, valid_conf_matrix_op,
                            LEARNING_RATE_1, WEIGHT_DECAY_1,
                            valid_cross_entropy, valid_accuracy)
                        valid_value1, valid_value2 = util.confidence_interval(
                            valid_acc, Z_SCORE, BATCH_SIZE)
                        print("Validation Accuracy : {}".format(valid_acc))
                        print("validation Loss (Cross Entropy) : {}".format(
                            valid_ce))
                        print(
                            "Validation Confidence Interval: [{} , {}]".format(
                                valid_value2, valid_value1))
                        if valid_highest_acc <= valid_acc:
                            valid_highest_acc = valid_acc
                            print("Highest Validation Accuracy Reached: {}".
                                  format(valid_highest_acc))
                        valid_writer.add_summary(
                            tf.summary.merge_all().eval(),
                            epoch * (floor(int(19657) / BATCH_SIZE)) + j)
                        j = j + 1
                except tf.errors.OutOfRangeError:
                    print("End of validation dataset, go to the next epoch")
                    pass
Beispiel #4
0
def main(cli_args):
    parser = argparse.ArgumentParser(
        description="CSCE 496 HW 2, Classify Cifar data")
    parser.add_argument('--input_dir',
                        type=str,
                        default='/work/cse496dl/shared/homework/02',
                        help='Numpy datafile input')
    parser.add_argument(
        '--model_dir',
        type=str,
        default='./homework_2/',
        help='directory where model graph and weights are saved')
    parser.add_argument('--epoch',
                        type=int,
                        default=100,
                        help="Epoch : number of iterations for the model")
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help="Batch Size")
    parser.add_argument('--model',
                        type=int,
                        help=" '1' for basic model, '2' for best model")
    parser.add_argument(
        '--stopCount',
        type=int,
        default=100,
        help="Number of times for dropping accuracy before early stopping")
    args_input = parser.parse_args(cli_args)

    if args_input.input_dir:
        input_dir = args_input.input_dir
    else:
        raise ValueError("Provide a valid input data path")

    if args_input.model_dir:
        model_dir = args_input.model_dir
    else:
        raise ValueError("Provide a valid model data path")

    if args_input.epoch:
        epochs = args_input.epoch
    else:
        raise ValueError("Epoch value cannot be null and has to be an integer")

    if args_input.batch_size:
        batch_size = args_input.batch_size
    else:
        raise ValueError(
            "Batch Size value cannot be null and has to be an integer")

    if args_input.model:
        model = args_input.model
    else:
        raise ValueError("Model selection must not be empty")

    if args_input.stopCount:
        stop_counter = args_input.stopCount
    else:
        raise ValueError("StopCount have to be an int")

    input_dir = '/work/cse496dl/shared/homework/02'
    #Make output model dir
    if os.path.exists(model_dir) == False:
        os.mkdir(model_dir)

    #Load Data
    x = tf.placeholder(tf.float32, [None, 32, 32, 3], name='input_placeholder')
    y = tf.placeholder(tf.float32, [None, 100], name='labels')

    #Specify Model
    if (str(model) == '1'):
        train_images, train_labels, test_images, test_labels, val_images, val_labels = util.load_data(
            "")
        _, outputLayer = initiate_basic_model(x)
        #Run Training with early stopping and save output
        counter = stop_counter
        prev_winner = 0
        curr_winner = 0
        optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
        cross_entropy = util.cross_entropy_op(y, outputLayer)
        global_step_tensor = util.global_step_tensor('global_step_tensor')
        train_op = util.train_op_basic(cross_entropy, global_step_tensor,
                                       optimizer)
        conf_matrix = util.confusion_matrix_op(y, outputLayer, 100)
        saver = tf.train.Saver()
        with tf.Session() as session:
            session.run(tf.global_variables_initializer())
            counter = stop_counter
            for epoch in range(epochs):
                if counter > 0:
                    print("Epoch : " + str(epoch))
                    util.training(batch_size, x, y, train_images, train_labels,
                                  session, train_op, conf_matrix, 100)
                    accuracy = util.validation(batch_size, x, y, val_images,
                                               val_labels, session,
                                               cross_entropy, conf_matrix, 100)
                    if epoch == 0:
                        prev_winner = accuracy
                        print("Saving.......")
                        saver.save(session,
                                   os.path.join("./homework_2/", "homework_2"))
                    else:
                        curr_winner = accuracy
                        if (curr_winner > prev_winner) and (counter > 0):
                            prev_winner = curr_winner
                            print("Saving.......")
                            saver.save(
                                session,
                                os.path.join("./homework_2/", "homework_2"))
                        else:
                            counter -= 1

                    test_accuracy = util.test(batch_size, x, y, test_images,
                                              test_labels, session,
                                              cross_entropy, conf_matrix, 100)
                    #Calculate the confidence interval
                    value1, value2 = util.confidence_interval(
                        test_accuracy, 1.96, test_images.shape[0])
                    print("Confidence Interval : " + str(value1) + " , " +
                          str(value2))
                else:
                    break

    elif (str(model) == '2'):
        sparsity_weight = 5e-3
        #Load the data and reshape it
        train_data = np.load(
            os.path.join(os.path.join(input_dir, 'imagenet_images.npy')))
        train_images, train_labels, test_images, test_labels, val_images, val_labels = util.load_data(
            "")
        #train_data = np.reshape(train_data, [-1,32,32,1])
        #Add noise to the data
        noise_level = 0.2
        x_noise = x + noise_level * tf.random_normal(tf.shape(x))
        code, outputs = initiate_autoencoder(x_noise, 100)
        #Optimizer for Autoencoder
        sparsity_loss = tf.norm(code, ord=1, axis=1)
        reconstruction_loss = tf.reduce_mean(tf.square(outputs -
                                                       x))  # Mean Square Error
        total_loss = reconstruction_loss + sparsity_weight * sparsity_loss
        optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
        train_op = optimizer.minimize(total_loss)
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            util.autoencoder_training(x, code, epochs, batch_size, train_data,
                                      sess, train_op)
            saver.save(sess, os.path.join("./homework_2/", "homework_2"))
        print("Done : " + str(code))

        _, outputLayer = initiate_dense_model(code)

        #Run Training with early stopping and save output
        counter = stop_counter
        prev_winner = 0
        curr_winner = 0
        optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
        cross_entropy = util.cross_entropy_op(y, outputLayer)
        global_step_tensor = util.global_step_tensor('global_step_tensor')
        #train_op = util.train_op_encoder(cross_entropy, global_step_tensor, optimizer, var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "code_layer"))
        train_op = util.train_op_basic(cross_entropy, global_step_tensor,
                                       optimizer)
        conf_matrix = util.confusion_matrix_op(y, outputLayer, 100)
        with tf.Session() as session:
            session.run(tf.global_variables_initializer())
            if os.path.isfile(os.path.join("./homework_2/", "homework_2")):
                saver = tf.train.import_meta_graph("homework_2.meta")
                saver.restore(session, "./homework_2/homework_2")
            code_encode = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                            "code_layer")
            session.run(
                tf.variables_initializer(code_encode,
                                         name="init_encoded_layer"))
            tf.stop_gradient(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                  "init_encoded_layer"))
            counter = stop_counter
            for epoch in range(epochs):
                if counter > 0:
                    print("Epoch : " + str(epoch))
                    util.training(batch_size, x, y, train_images, train_labels,
                                  session, train_op, conf_matrix, 100)
                    accuracy = util.validation(batch_size, x, y, val_images,
                                               val_labels, session,
                                               cross_entropy, conf_matrix, 100)
                    if epoch == 0:
                        prev_winner = accuracy
                        print("Saving.......")
                        saver.save(session,
                                   os.path.join("./homework_2/", "homework_2"))
                    else:
                        curr_winner = accuracy
                        if (curr_winner > prev_winner) and (counter > 0):
                            prev_winner = curr_winner
                            print("Saving.......")
                            saver.save(
                                session,
                                os.path.join("./homework_2/", "homework_2"))
                        else:
                            print("Validation Loss : " +
                                  str(curr_winner - prev_winner))
                            counter -= 1

                    test_accuracy = util.test(batch_size, x, y, test_images,
                                              test_labels, session,
                                              cross_entropy, conf_matrix, 100)
                    #Calculate the confidence interval
                    value1, value2 = util.confidence_interval(
                        test_accuracy, 1.96, test_images.shape[0])
                    print("Confidence Interval : " + str(value1) + " , " +
                          str(value2))
                else:
                    break
Beispiel #5
0
def main(args_parser):
    parser = args_parser
    args   = parser.parse_args()

    #tf.reset_default_graph()
    DATASET_PATH = args.datasetPath
    LEARNING_RATE_1 = 0.01#(args.learningRate)
    EPOCHS = 4 #int(args.epochs)
    BATCH_SIZE = 32 #int(args.batchSize)
    NUM_CLASSES = 5 #int(args.numClasses)
    Z_SCORE = 1.96 #(args.zScore)
    WEIGHT_DECAY_1 = 0.0005#(args.weightDecay)

    print("Current Setup:-")
    print("Starting Learning Rate: {}, Epochs: {}, Batch Size: {}, Confidence Interval Z-Score {}, Number of classes: {}, Starting Weight Decay: {}".format(LEARNING_RATE_1, EPOCHS, BATCH_SIZE, Z_SCORE, NUM_CLASSES, WEIGHT_DECAY_1))

    #Placeholders
    learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate')
    weight_decay = tf.placeholder(tf.float32, shape=[], name="weight_decay")

    #Dataset
    train_next_element, train_iterator = util.train_input_fn(DATASET_PATH, BATCH_SIZE, EPOCHS)
    valid_next_element, valid_iterator = util.valid_input_fn(DATASET_PATH, BATCH_SIZE, EPOCHS)

    #dataset_len = 157252
    #Model
    _, train_op, train_cross_entropy, train_conf_matrix_op, train_accuracy = initiate_vgg_model(train_next_element[0], train_next_element[1], train_next_element[2], NUM_CLASSES, weight_decay, learning_rate, handle="training", reuse_model=None)
    _, _, valid_cross_entropy, valid_conf_matrix_op, valid_accuracy = initiate_vgg_model(valid_next_element[0], valid_next_element[1], valid_next_element[2], NUM_CLASSES, weight_decay, learning_rate, handle="validation", reuse_model=True)
        #tf.summary.scalar("training_confusion_matrix", tf.reshape(tf.cast(conf_matrix_op, tf.float32),[1, NUM_CLASSES, NUM_CLASSES, 1]))
    saver = tf.train.Saver()

    if not os.path.exists(os.path.join("./short_dl_research_train/")):
        os.mkdir(os.path.join("./short_dl_research_train/"))
        
    with tf.Session() as sess:
        with np.printoptions(threshold=np.inf):
            train_writer = tf.summary.FileWriter("./short_tensorboard_training_logs/")
            valid_writer = tf.summary.FileWriter("./short_tensorboard_validation_logs/")
            train_writer.add_graph(sess.graph)
            valid_writer.add_graph(sess.graph)
            train_highest_acc = 0
            valid_highest_acc = 0
            sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

            for epoch in range(EPOCHS):
                sess.run([train_iterator.initializer, valid_iterator.initializer])
                print("Train iterator and valid iterator are initialized")
                print("Current Epoch: {}/{}".format(epoch, EPOCHS))
                i = 0
                try:
                    while True:
                        print("Current Training Iteration : {}/{}".format(i, floor(int(157252)/BATCH_SIZE)))
                        train_acc, _, _, train_ce = util.training(BATCH_SIZE, NUM_CLASSES,learning_rate, weight_decay, sess, train_op, train_conf_matrix_op, LEARNING_RATE_1, WEIGHT_DECAY_1, train_cross_entropy, train_accuracy)
                        train_value1, train_value2 = util.confidence_interval(train_acc, Z_SCORE, BATCH_SIZE)
                        print("Training Accuracy : {}".format(train_acc))
                        print("Training Loss (Cross Entropy) : {}".format(train_ce))
                        print("Training Confidence Interval: [{} , {}]".format(train_value2, train_value1))
                        if train_highest_acc <= train_acc:
                            train_highest_acc = train_acc
                            print("Highest Training Accuracy Reached: {}".format(train_highest_acc))
                        #For every epoch, we will save the model
                            saver.save(sess, os.path.join("./short_dl_research_train/", "model.ckpt"))
                            print("Latest Model is saving and Tensorboard Logs are updated")  
                        train_writer.add_summary(tf.summary.merge_all().eval(), epoch * (floor(int(157252)/BATCH_SIZE)) + i)
                        i = i + 1
                except tf.errors.OutOfRangeError:
                    print("End of the training dataset, proceed to validation")
                    pass

                j = 0
                try:
                    while True:
                        print("Current Validation Iteration : {}/{}".format(j, floor(int(19657)/BATCH_SIZE)))
                        valid_acc, _, valid_ce = util.validation(BATCH_SIZE, NUM_CLASSES,learning_rate, weight_decay, sess, valid_conf_matrix_op, LEARNING_RATE_1, WEIGHT_DECAY_1, valid_cross_entropy, valid_accuracy)
                        valid_value1, valid_value2 = util.confidence_interval(valid_acc, Z_SCORE, BATCH_SIZE)
                        print("Validation Accuracy : {}".format(valid_acc))
                        print("validation Loss (Cross Entropy) : {}".format(valid_ce))
                        print("Validation Confidence Interval: [{} , {}]".format(valid_value2, valid_value1))
                        if valid_highest_acc <= valid_acc:
                            valid_highest_acc = valid_acc
                            print("Highest Validation Accuracy Reached: {}".format(valid_highest_acc))
                        valid_writer.add_summary(tf.summary.merge_all().eval(), epoch * (floor(int(19657)/BATCH_SIZE)) + j)
                        j = j + 1
                except tf.errors.OutOfRangeError:
                    print("End of validation dataset, go to the next epoch")
                    pass
def main():
    #Constants
    DATASET_PATH = os.path.join(".")
    LEARNING_RATE_1 = 0.0001
    EPOCHS = 2
    BATCH_SIZE = 32
    NUM_CLASSES = 48
    Z_SCORE = 1.96
    WEIGHT_DECAY_1 = 0.0005

    print("Current Setup:-")
    print(
        "Starting Learning Rate: {}, Epochs: {}, Batch Size: {}, Confidence Interval Z-Score {}, Number of classes: {}, Starting Weight Decay: {}"
        .format(LEARNING_RATE_1, EPOCHS, BATCH_SIZE, Z_SCORE, NUM_CLASSES,
                WEIGHT_DECAY_1))

    #Get the number of GPUs
    NUM_GPUS = util.get_available_gpus()

    print("Number of GPUs available : {}".format(NUM_GPUS))
    with tf.device('/cpu:0'):
        tower_grads = []
        reuse_vars = False
        dataset_len = 1207350

        #Placeholders
        learning_rate = tf.placeholder(tf.float32,
                                       shape=[],
                                       name='learning_rate')
        weight_decay = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="weight_decay")

        for i in range(NUM_GPUS):
            with tf.device(
                    util.assign_to_device('/gpu:{}'.format(i),
                                          ps_device='/cpu:0')):

                #Need to split data between GPUs
                train_features, train_labels, train_filenames = util.train_input_fn(
                    DATASET_PATH, BATCH_SIZE, EPOCHS)
                print("At GPU {}, Train Features : {}".format(
                    i, train_features))

                #Model
                _, train_op, tower_grads, train_cross_entropy, train_conf_matrix_op, train_accuracy, reuse_vars = initiate_vgg_model(
                    train_features,
                    train_labels,
                    train_filenames,
                    NUM_CLASSES,
                    weight_decay,
                    learning_rate,
                    reuse=reuse_vars,
                    tower_grads=tower_grads,
                    gpu_num=i,
                    handle="training")
                #tf.summary.scalar("training_confusion_matrix", tf.reshape(tf.cast(conf_matrix_op, tf.float32),[1, NUM_CLASSES, NUM_CLASSES, 1]))

        tower_grads = util.average_gradients(tower_grads)
        train_op = train_op.apply_gradients(tower_grads)

        saver = tf.train.Saver()

        if not os.path.exists(os.path.join("./multi_dl_research_train/")):
            os.mkdir(os.path.join("./multi_dl_research_train/"))

        with tf.Session() as sess:
            with np.printoptions(threshold=np.inf):
                writer = tf.summary.FileWriter("./multi_tensorboard_logs/")
                writer.add_graph(sess.graph)
                merged_summary = tf.summary.merge_all()
                train_highest_acc = 0
                sess.run([
                    tf.global_variables_initializer(),
                    tf.local_variables_initializer()
                ])

                for epoch in range(EPOCHS):
                    if epoch == 18:
                        LEARNING_RATE_1 = 0.00005
                        print("Learning Rate changed to {} at epoch {}".format(
                            LEARNING_RATE_1, epoch))
                    elif epoch == 29:
                        LEARNING_RATE_1 = 0.00001
                        WEIGHT_DECAY_1 = 0.0
                        print("Learning Rate changed to {} at epoch {}".format(
                            LEARNING_RATE_1, epoch))
                        print("Weight Decay changed to {} at epoch {}".format(
                            WEIGHT_DECAY_1, epoch))
                    elif epoch == 42:
                        LEARNING_RATE_1 = 0.000005
                        print("Learning Rate changed to {} at epoch {}".format(
                            LEARNING_RATE_1, epoch))
                    elif epoch == 51:
                        LEARNING_RATE_1 = 0.000001
                        print("Learning Rate changed to {} at epoch {}".format(
                            LEARNING_RATE_1, epoch))

                    print("Current Epoch: {}".format(epoch))
                    for i in range(2):
                        print("Current Training Iteration : {}/{}".format(
                            i, 10))
                        train_acc, _, _, train_ce, train_summary = util.training(
                            BATCH_SIZE, NUM_CLASSES, learning_rate,
                            weight_decay, sess, train_op, train_conf_matrix_op,
                            LEARNING_RATE_1, WEIGHT_DECAY_1,
                            train_cross_entropy, merged_summary,
                            train_accuracy)
                        train_value1, train_value2 = util.confidence_interval(
                            train_acc, Z_SCORE, 32)
                        print("Training Accuracy : {}".format(train_acc))
                        print("Training Loss (Cross Entropy) : {}".format(
                            train_ce))
                        print("Training Confidence Interval: [{} , {}]".format(
                            train_value2, train_value1))
                        if train_highest_acc <= train_acc:
                            train_highest_acc = train_acc
                            print(
                                "Highest Training Accuracy Reached: {}".format(
                                    train_highest_acc))
                            #For every epoch, we will save the model
                            saver.save(
                                sess,
                                os.path.join("./multi_dl_research_train/",
                                             "model.ckpt"))
                            print(
                                "Latest Model is saving and Tensorboard Logs are updated"
                            )
                        writer.add_summary(
                            train_summary,
                            epoch * int((dataset_len * 0.8) / BATCH_SIZE) + i)
Beispiel #7
0
def main(cli_args):
    parser = argparse.ArgumentParser(
        description="CSCE 496 HW 1, Classify Fashion MNIST data")
    parser.add_argument('--input_dir',
                        type=str,
                        default='/work/cse496dl/shared/homework/01',
                        help='Numpy datafile input')
    parser.add_argument(
        '--model_dir',
        type=str,
        default='./homework_1/',
        help='directory where model graph and weights are saved')
    parser.add_argument('--epoch',
                        type=int,
                        default=100,
                        help="Epoch : number of iterations for the model")
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help="Batch Size")
    parser.add_argument('--model',
                        type=int,
                        help=" '1' for basic model, '2' for best model")
    parser.add_argument(
        '--stopCount',
        type=int,
        default=100,
        help="Number of times for dropping accuracy before early stopping")
    args_input = parser.parse_args(cli_args)

    if args_input.input_dir:
        input_dir = args_input.input_dir
    else:
        raise ValueError("Provide a valid input data path")

    if args_input.model_dir:
        model_dir = args_input.model_dir
    else:
        raise ValueError("Provide a valid model data path")

    if args_input.epoch:
        epochs = args_input.epoch
    else:
        raise ValueError("Epoch value cannot be null and has to be an integer")

    if args_input.batch_size:
        batch_size = args_input.batch_size
    else:
        raise ValueError(
            "Batch Size value cannot be null and has to be an integer")

    if args_input.model:
        model = args_input.model
    else:
        raise ValueError("Model selection must not be empty")

    if args_input.stopCount:
        stop_counter = args_input.stopCount
    else:
        raise ValueError("StopCount have to be an int")

    input_dir = '/work/cse496dl/shared/homework/01'
    #Make output model dir
    if os.path.exists(model_dir) == False:
        os.mkdir(model_dir)

    #Load Data
    train_images, train_labels, test_images, test_labels, val_images, val_labels = util.load_data(
        input_dir)
    x = tf.placeholder(tf.float32, [None, 784], name='input_placeholder')
    y = tf.placeholder(tf.float32, [None, 10], name='labels')

    #Specify Model
    if (str(model) == '1'):
        _, outputLayer = initiate_basic_model(x)
    elif (str(model) == '2'):
        _, outputLayer = initiate_better_model(x)

    #Run Training with early stopping and save output
    counter = stop_counter
    prev_winner = 0
    curr_winner = 0
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
    cross_entropy = util.cross_entropy_op(y, outputLayer)
    global_step_tensor = util.global_step_tensor('global_step_tensor')
    train_op = util.train_op(cross_entropy, global_step_tensor, optimizer)
    conf_matrix = util.confusion_matrix_op(y, outputLayer, 10)
    saver = tf.train.Saver()
    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        for i in range(10):
            print("KFold : " + str(i))
            counter = stop_counter
            for epoch in range(epochs):
                if counter > 0:
                    print("Epoch : " + str(epoch))
                    util.training(batch_size, x, y, train_images[i],
                                  train_labels[i], session, train_op,
                                  conf_matrix, 10)
                    accuracy = util.validation(batch_size, x, y, val_images[i],
                                               val_labels[i], session,
                                               cross_entropy, conf_matrix, 10)
                    if epoch == 0:
                        prev_winner = accuracy
                    else:
                        curr_winner = accuracy
                        if (curr_winner > prev_winner) and (counter > 0):
                            prev_winner = curr_winner
                        else:
                            counter -= 1

                    test_accuracy = util.test(batch_size, x, y, test_images[i],
                                              test_labels[i], session,
                                              cross_entropy, conf_matrix, 10)
                    #Calculate the confidence interval
                    value1, value2 = util.confidence_interval(
                        test_accuracy, 1.96, test_images[i].shape[0])
                    print("Confidence Interval : " + str(value1) + " , " +
                          str(value2))
                else:
                    break
            print("Saving.......")
            saver.save(session, os.path.join("./homework_1/", "homework_1"))
Beispiel #8
0
def run_training():
    '''train the Neural Network'''
    # sanity check
    assert (FLAGS.input_data_type == 'float' or FLAGS.input_data_type == 'int')
    assert (FLAGS.output_data_type == 'float'
            or FLAGS.output_data_type == 'int')
    # import the dataset
    data_sets = dataset.Datasets(FLAGS.data_dir, FLAGS.separate_file,
                                 FLAGS.input_data_type, FLAGS.output_data_type)
    #for hotspot training
    '''
    data_sets = dataset.Datasets(FLAGS.data_dir,
            FLAGS.separate_file,
            FLAGS.input_data_type, FLAGS.output_data_type,
            FLAGS.tile_size, FLAGS.num_maps)
    '''

    with tf.Graph().as_default():
        # placeholder
        input_pl, golden_pl = util.generate_placeholder(
            data_sets.num_in_neuron, data_sets.num_out_neuron,
            FLAGS.batch_size, FLAGS.input_data_type, FLAGS.output_data_type)
        # build graph
        if FLAGS.hidden1 == 0:
            assert (FLAGS.hidden2 == 0)
            outputs = util.layer('output_layer', input_pl,
                                 data_sets.num_in_neuron,
                                 data_sets.num_out_neuron, None)
        else:
            hidden1 = util.layer('hidden1', input_pl, data_sets.num_in_neuron,
                                 FLAGS.hidden1, util.fast_sigmoid)
            if FLAGS.hidden2 == 0:
                outputs = util.layer('output_layer', hidden1, FLAGS.hidden1,
                                     data_sets.num_out_neuron, None)
            else:
                hidden2 = util.layer('hidden2', hidden1, FLAGS.hidden1,
                                     FLAGS.hidden2, util.fast_sigmoid)
                outputs = util.layer('output_layer', hidden2, FLAGS.hidden2,
                                     data_sets.num_out_neuron, None)

        # loss
        #loss = bm.loss(outputs, golden_pl)
        loss = util.loss(outputs, golden_pl, FLAGS.benchmark)

        # train
        #train_op = bm.training(loss, FLAGS.learning_rate)
        train_op = util.training(loss, FLAGS.learning_rate)

        # accumulated error for one batch of data
        error = util.error(outputs, golden_pl, FLAGS.benchmark)

        # summary - not necessary
        summary = tf.merge_all_summaries()

        # init
        init = tf.initialize_all_variables()

        # sess
        sess = tf.Session()

        # summary writer - not necessary
        summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph)

        # everything built, run init
        sess.run(init)

        # start training
        #_, max_steps = data_sets.train.max_steps(FLAGS.batch_size)
        for step in xrange(FLAGS.max_steps):
            feed_dict = util.fill_feed_dict(data_sets.train, input_pl,
                                            golden_pl, FLAGS.batch_size)
            sess.run(train_op, feed_dict=feed_dict)

            # print the loss every 100 steps
            # write the summary
            # evaluate the model
            if not step % 100:
                print('step %d: loss = %.2f' %
                      (step, sess.run(loss, feed_dict=feed_dict)))

                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()
                '''
                print('training data evaluation')
                util.do_eval(sess, error,
                        input_pl, golden_pl,
                        FLAGS.batch_size, data_sets.train)
                '''
                print('validation data evaluation')
                util.do_eval(sess, error, input_pl, golden_pl,
                             FLAGS.batch_size, data_sets.validate)

        # final accuracy
        print('test data evaluation')
        util.do_eval(sess, error, input_pl, golden_pl, FLAGS.batch_size,
                     data_sets.test)

        # filename for saving
        savefile = str(data_sets.num_in_neuron) + "_" + str(
            FLAGS.hidden1) + "_" + str(FLAGS.hidden2) + "_" + str(
                data_sets.num_out_neuron) + ".txt"

        # save weights and biases
        util.save_config(sess, NUM_LAYERS, FLAGS.config_dir, savefile)

        # save trained output
        #util.save_output(sess, data_sets.train, outputs, FLAGS.data_dir)
        #need to fetch original input data
        output_save = sess.run(outputs,
                               feed_dict={input_pl: data_sets.input_data})
        np.savetxt(FLAGS.data_dir + "train_result/" + savefile,
                   output_save,
                   delimiter=" ")