def main(args):
    """
    main function
    """

    with tf.Session() as sess:

        # training mode
        if args.mode == 0:
            ism_data = preprocessing.ism_data(ism_path=args.ism_training_path,
                                              interval=args.interval,
                                              Rc=args.Rc,
                                              batch_size=args.batch_size,
                                              mode=0)
            # define DNN classifier
            N_feature = ism_data.N_feature
            num_train_set = ism_data.num_data
            classifier = ISMClassifier(sess, N_feature,
                                       args.learning_rate_base, num_train_set,
                                       args.batch_size)
            classifier.initialize()
            # checkpoint directory
            if not os.path.exists(args.model_path):
                os.mkdir(args.model_path)
            else:
                del_file(args.model_path)
            # iterate on epoch
            for ep in range(args.num_epoch):
                # iterate on batches
                for step in range(args.train_steps):
                    input_batch, label_batch = ism_data.next_batch()
                    loss, accuracy, _ = classifier.train(
                        input_batch, label_batch)
                    if step % 1000 == 0:
                        print('epoch-{},step-{}: loss {}, accuracy {}'.format(
                            ep, step, loss, accuracy))
                    if (step + 1) % 10000 == 0:
                        classifier.saver.save(sess,
                                              os.path.join(
                                                  args.model_path,
                                                  'model.ckpt'),
                                              global_step=step)
        # validation mode
        elif args.mode == 1:
            ism_data = preprocessing.ism_data(
                ism_path=args.ism_validation_path,
                interval=args.interval,
                Rc=args.Rc,
                batch_size=args.batch_size,
                mode=1)
            # define DNN classifier
            N_feature = ism_data.N_feature
            num_train_set = ism_data.num_data
            classifier = ISMClassifier(sess, N_feature,
                                       args.learning_rate_base, num_train_set,
                                       args.batch_size)
            for file_name in os.listdir(args.model_path):
                f = re.match(r'model\.ckpt\-(\d*)\.data.*', file_name)
                if f is None:
                    continue
                else:
                    classifier.saver.restore(
                        sess,
                        os.path.join(args.model_path,
                                     'model.ckpt-' + f.group(1)))
                    input_all, label_all = ism_data.validation_set()
                    output, accuracy = classifier.validate(
                        input_all, label_all)
                    print('validation accuracy is {} after training steps {}'.
                          format(accuracy, int(f.group(1))))
                    # output after gate operation
                    gate = filter_gate()
                    print(gate.stop_loss_gate(output, label_all))
        # test mode
        elif args.mode == 2:
            pass
Ejemplo n.º 2
0
import tensorflow as tf
import numpy as np
import pandas as pd
import preprocessing
from preprocessing import SF_map, ism_data
from utility import *
import random
import re

# ismlist = preprocessing.ism_list('./ismdatabak_training')
# # logit_labels = ismlist.ism_logit_and_labels('./ismdatabak_training/ismlist.20180313.1')
# logit_labels = ismlist.logit_and_labels()
# ismfactor = preprocessing.ism_factor('./ismdatabak_training')
# std_factor = ismfactor.standard_factor()
ismdata = preprocessing.ism_data('./ismdatabak_training', 10)
logit_batch, factor_batch, label_batch = ismdata.next_batch()
print(factor_batch)
print(label_batch)



Ejemplo n.º 3
0
def main(args):
    """
    main function doc is missing ... 
    """
    # logger
    logger = logging.getLogger('ism_logger')

    with tf.Session() as sess:

        # training mode
        if args.mode == 0:
            logger.info('cascading DNN classifier, training mode begins ...')
            ism_data = preprocessing.ism_data(ism_path=args.ism_training_path,
                                              interval=args.interval,
                                              batch_size=args.batch_size,
                                              scaler_ls=None,
                                              Rc=args.Rc,
                                              mode=0)
            # define cascade-DNN classifier
            N_logit = ism_data.N_logit
            N_factor = ism_data.N_factor
            num_train_set = ism_data.num_data
            classifier = ISMClassifier(sess, N_logit, N_factor,
                                       args.learning_rate_base, num_train_set,
                                       args.batch_size)
            classifier.initialize()
            # checkpoint directory
            if not os.path.exists(args.model_path):
                os.mkdir(args.model_path)
            else:  # clear directory
                del_file(args.model_path)

            # iterate on epoch
            num_steps = int(num_train_set / args.batch_size)
            index = []
            for ep in range(args.num_epoch):
                # iterate on batches
                for step in range(num_steps):
                    logit_batch, factor_batch, label_batch = ism_data.next_batch(
                    )
                    loss, accuracy, _ = classifier.train(
                        logit_batch, factor_batch, label_batch)
                print('epoch-{}: loss {}, accuracy {}'.format(
                    ep, loss, accuracy))
                logger.info(
                    'epoch-{0:n}: loss {1:.4f}, accuracy {2:.3f}'.format(
                        ep, loss, accuracy))
                # save model
                if ep > 10:
                    logit_test, factor_test, label_test = ism_data.next_batch(
                        batch_size=2000)
                    accuracy_test = classifier.accuracy_test(
                        logit_test, factor_test, label_test)
                    classifier.saver.save(sess,
                                          os.path.join(args.model_path,
                                                       'model.ckpt'),
                                          global_step=ep)
                    index.append([ep, loss, accuracy_test])
            index = pd.DataFrame(index, columns=['epoch', 'loss', 'accuracy'])
            index.to_csv(os.path.join(args.model_path, 'index.csv'),
                         index=None)

        # validation mode
        elif args.mode == 1:
            logger.info('cascading DNN classifier, validation mode begins ...')
            scaler_ls = joblib.load('./standard_scaler_list.pkl')
            ism_data = preprocessing.ism_data(
                ism_path=args.ism_validation_path,
                interval=args.interval,
                batch_size=args.batch_size,
                scaler_ls=scaler_ls,
                Rc=args.Rc,
                mode=1)
            # define cascade-DNN classifier
            N_logit = ism_data.N_logit
            N_factor = ism_data.N_factor
            num_train_set = ism_data.num_data
            classifier = ISMClassifier(sess, N_logit, N_factor,
                                       args.learning_rate_base, num_train_set,
                                       args.batch_size)
            # restore model
            for file_name in os.listdir(args.model_path):
                f = re.match(r'model\.ckpt\-(\d*)\.data.*', file_name)
                if f is None:
                    continue
                else:
                    classifier.saver.restore(
                        sess,
                        os.path.join(args.model_path,
                                     'model.ckpt-' + f.group(1)))
                    logit_all, factor_all, label_all = ism_data.validation_set(
                    )
                    output, accuracy = classifier.validate(
                        logit_all, factor_all, label_all)
                    print('validation accuracy is {} after training steps {}'.
                          format(accuracy, int(f.group(1))))
                    # output of gate operation
                    gate = filter_gate()
                    x1, x2, x3, x4 = gate.stop_loss_gate(output, label_all)
                    print(gate.stop_loss_gate(output, label_all))
                    logger.info('validation accuracy is {0:.3f} for model-{1:n}, original win-ratio = {2:.3f} and num = {3:n}, and filtering win-ratio = {4:.3f} and num = {5:n}' \
                                .format(accuracy, int(f.group(1)), x1, x2, x3, x4))

        # testing mode
        elif args.mode == 2:
            logger.info('cascading DNN classifier, evaluation mode begins ...')
            scaler_ls = joblib.load('./standard_scaler_list.pkl')
            ism_data = preprocessing.ism_data(
                ism_path=args.ism_evaluation_path,
                interval=args.interval,
                batch_size=args.batch_size,
                scaler_ls=scaler_ls,
                Rc=args.Rc,
                mode=2)
            # define cascade-DNN classifier
            N_logit = ism_data.N_logit
            N_factor = ism_data.N_factor
            num_train_set = ism_data.num_data
            classifier = ISMClassifier(sess, N_logit, N_factor,
                                       args.learning_rate_base, num_train_set,
                                       args.batch_size)
            # restore model
            index = pd.read_csv(os.path.join(args.model_path, 'index.csv'))
            residue = []
            # check output directory
            if not os.path.exists(args.ismcode_output_path):
                os.mkdir(args.ismcode_output_path)
            else:
                del_file(args.ismcode_output_path)

            for file_name in os.listdir(args.model_path):
                f = re.match(r'model\.ckpt\-(\d*)\.data.*', file_name)
                if f is None:
                    continue
                else:
                    classifier.saver.restore(
                        sess,
                        os.path.join(args.model_path,
                                     'model.ckpt-' + f.group(1)))
                    logit_all, factor_all, ismcode_all = ism_data.evaluation_set(
                    )
                    output, ismcode = classifier.evaluate(
                        logit_all, factor_all, ismcode_all)
                    # output of gate operation
                    gate = filter_gate()
                    code_ls = gate.filter_output(output, ismcode)
                    residue.append(
                        [int(f.group(1)),
                         float(len(code_ls) / len(output))])

            # choose the best model to generate
            residue = pd.DataFrame(residue, columns=['epoch', 'residue_ratio'])
            index_all = pd.merge(index, residue, on='epoch')
            index_all = index_all.sort_values(by='epoch').reset_index()
            index_all.to_csv(os.path.join(args.model_path, 'index_all.csv'),
                             index=None)
            index_ranking = np.zeros((len(index_all), 4), dtype=np.float64)
            index_ranking[:, 0] = index_all['loss'].argsort().values
            index_ranking[:, 1] = index_all['accuracy'].argsort().values
            index_ranking[:, 2] = index_all['residue_ratio'].argsort().values
            index_ranking[:,
                          3] = 0.4 * index_ranking[:,
                                                   0] + 0.4 * index_ranking[:,
                                                                            1] + 0.2 * index_ranking[:,
                                                                                                     2]
            mode_id = index_all['epoch'].iloc[np.argmin(index_ranking[:, 3])]
            classifier.saver.restore(
                sess,
                os.path.join(args.model_path, 'model.ckpt-' + str(mode_id)))
            logit_all, factor_all, ismcode_all = ism_data.evaluation_set()
            output, ismcode = classifier.evaluate(logit_all, factor_all,
                                                  ismcode_all)
            # output of gate operation
            gate = filter_gate()
            code_ls = gate.filter_output(output, ismcode)
            np.savetxt(os.path.join(args.ismcode_output_path,
                                    'filter_ismcode.txt'),
                       code_ls,
                       fmt='%s')
            print('the chosen best model is model-{}'.format(mode_id))
            logger.info('the chosen best model is model-{}'.format(mode_id))
def main(args):
    """
    main function doc is missing ... 
    """
    with tf.Session() as sess:

        # training mode
        if args.mode == 0:
            ism_data = preprocessing.ism_data(ism_path=args.ism_training_path,
                                              interval=args.interval,
                                              batch_size=args.batch_size,
                                              scaler_ls=None,
                                              Rc=args.Rc,
                                              mode=0)
            # define cascade-DNN classifier
            N_logit = ism_data.N_logit
            N_factor = ism_data.N_factor
            num_train_set = ism_data.num_data
            classifier = ISMClassifier(sess, N_logit, N_factor,
                                       args.learning_rate_base, num_train_set,
                                       args.batch_size)
            classifier.initialize()
            # checkpoint directory
            if not os.path.exists(args.model_path):
                os.mkdir(args.model_path)
            else:  # clear directory
                del_file(args.model_path)
            # iterate on epoch
            for ep in range(args.num_epoch):
                # iterate on batches
                for step in range(args.train_steps):
                    indices, idsval, shape, factor, labels = ism_data.next_batch(
                    )
                    loss, accuracy, _ = classifier.train(
                        indices, idsval, shape, factor, labels)
                    if step % 1000 == 0:
                        print('epoch-{},step-{}: loss {}, accuracy {}'.format(
                            ep, step, loss, accuracy))
                    if (step + 1) % 10000 == 0:
                        classifier.saver.save(sess,
                                              os.path.join(
                                                  args.model_path,
                                                  'model.ckpt'),
                                              global_step=step)
        # validation mode
        elif args.mode == 1:
            scaler_ls = joblib.load('./standard_scaler_list.pkl')
            ism_data = preprocessing.ism_data(
                ism_path=args.ism_validation_path,
                interval=args.interval,
                batch_size=args.batch_size,
                scaler_ls=scaler_ls,
                Rc=args.Rc,
                mode=1)
            # define cascade-DNN classifier
            N_logit = ism_data.N_logit
            N_factor = ism_data.N_factor
            num_train_set = ism_data.num_data
            classifier = ISMClassifier(sess, N_logit, N_factor,
                                       args.learning_rate_base, num_train_set,
                                       args.batch_size)

            # restore model
            for file_name in os.listdir(args.model_path):
                f = re.match(r'model\.ckpt\-(\d*)\.data.*', file_name)
                if f is None:
                    continue
                else:
                    classifier.saver.restore(
                        sess,
                        os.path.join(args.model_path,
                                     'model.ckpt-' + f.group(1)))
                    indices, idsval, shape, factor, labels = ism_data.validation_set(
                    )
                    output, accuracy = classifier.validate(
                        indices, idsval, shape, factor, labels)
                    print('validation accuracy is {} after training steps {}'.
                          format(accuracy, int(f.group(1))))
                    # output of gate operation
                    gate = filter_gate()
                    print(gate.stop_loss_gate(output, labels))
        # testing mode
        elif args.mode == 2:
            pass
Ejemplo n.º 5
0
# label = ism_list('./ismdatabak_training', Rc=0.0)
# df1 = label.labels()
# print(df1)
# df1.to_csv('./ism_label.csv', header=None, index=None)

# ls = []
# cnt = 0
# for file_name in os.listdir('./ismdatabak_training'):
#     cnt += 1
#     f = re.match(r'ismfactors\.(\d{4})-?(\d{2})-?(\d{2}).*', file_name)
#     if f is None:
#         continue
#     else:
#         f2 = os.path.join('ismdatabak_training',
#                           re.sub('ismfactors', 'ismlist', f.group()))
#         if not os.path.isfile(f2):
#             continue
#         else:
#             ls.append(f2)
#
# print(len(ls), cnt)

scaler_ls = joblib.load('standard_scaler_list.pkl')
ism = ism_data('./ismdatabak_validation',
               batch_size=10,
               scaler_ls=scaler_ls,
               mode=1)
inputs, labels = ism.validation_set()
print(inputs)