def main(args): """ main function """ with tf.Session() as sess: # training mode if args.mode == 0: ism_data = preprocessing.ism_data(ism_path=args.ism_training_path, interval=args.interval, Rc=args.Rc, batch_size=args.batch_size, mode=0) # define DNN classifier N_feature = ism_data.N_feature num_train_set = ism_data.num_data classifier = ISMClassifier(sess, N_feature, args.learning_rate_base, num_train_set, args.batch_size) classifier.initialize() # checkpoint directory if not os.path.exists(args.model_path): os.mkdir(args.model_path) else: del_file(args.model_path) # iterate on epoch for ep in range(args.num_epoch): # iterate on batches for step in range(args.train_steps): input_batch, label_batch = ism_data.next_batch() loss, accuracy, _ = classifier.train( input_batch, label_batch) if step % 1000 == 0: print('epoch-{},step-{}: loss {}, accuracy {}'.format( ep, step, loss, accuracy)) if (step + 1) % 10000 == 0: classifier.saver.save(sess, os.path.join( args.model_path, 'model.ckpt'), global_step=step) # validation mode elif args.mode == 1: ism_data = preprocessing.ism_data( ism_path=args.ism_validation_path, interval=args.interval, Rc=args.Rc, batch_size=args.batch_size, mode=1) # define DNN classifier N_feature = ism_data.N_feature num_train_set = ism_data.num_data classifier = ISMClassifier(sess, N_feature, args.learning_rate_base, num_train_set, args.batch_size) for file_name in os.listdir(args.model_path): f = re.match(r'model\.ckpt\-(\d*)\.data.*', file_name) if f is None: continue else: classifier.saver.restore( sess, os.path.join(args.model_path, 'model.ckpt-' + f.group(1))) input_all, label_all = ism_data.validation_set() output, accuracy = classifier.validate( input_all, label_all) print('validation accuracy is {} after training steps {}'. format(accuracy, int(f.group(1)))) # output after gate operation gate = filter_gate() print(gate.stop_loss_gate(output, label_all)) # test mode elif args.mode == 2: pass
import tensorflow as tf import numpy as np import pandas as pd import preprocessing from preprocessing import SF_map, ism_data from utility import * import random import re # ismlist = preprocessing.ism_list('./ismdatabak_training') # # logit_labels = ismlist.ism_logit_and_labels('./ismdatabak_training/ismlist.20180313.1') # logit_labels = ismlist.logit_and_labels() # ismfactor = preprocessing.ism_factor('./ismdatabak_training') # std_factor = ismfactor.standard_factor() ismdata = preprocessing.ism_data('./ismdatabak_training', 10) logit_batch, factor_batch, label_batch = ismdata.next_batch() print(factor_batch) print(label_batch)
def main(args): """ main function doc is missing ... """ # logger logger = logging.getLogger('ism_logger') with tf.Session() as sess: # training mode if args.mode == 0: logger.info('cascading DNN classifier, training mode begins ...') ism_data = preprocessing.ism_data(ism_path=args.ism_training_path, interval=args.interval, batch_size=args.batch_size, scaler_ls=None, Rc=args.Rc, mode=0) # define cascade-DNN classifier N_logit = ism_data.N_logit N_factor = ism_data.N_factor num_train_set = ism_data.num_data classifier = ISMClassifier(sess, N_logit, N_factor, args.learning_rate_base, num_train_set, args.batch_size) classifier.initialize() # checkpoint directory if not os.path.exists(args.model_path): os.mkdir(args.model_path) else: # clear directory del_file(args.model_path) # iterate on epoch num_steps = int(num_train_set / args.batch_size) index = [] for ep in range(args.num_epoch): # iterate on batches for step in range(num_steps): logit_batch, factor_batch, label_batch = ism_data.next_batch( ) loss, accuracy, _ = classifier.train( logit_batch, factor_batch, label_batch) print('epoch-{}: loss {}, accuracy {}'.format( ep, loss, accuracy)) logger.info( 'epoch-{0:n}: loss {1:.4f}, accuracy {2:.3f}'.format( ep, loss, accuracy)) # save model if ep > 10: logit_test, factor_test, label_test = ism_data.next_batch( batch_size=2000) accuracy_test = classifier.accuracy_test( logit_test, factor_test, label_test) classifier.saver.save(sess, os.path.join(args.model_path, 'model.ckpt'), global_step=ep) index.append([ep, loss, accuracy_test]) index = pd.DataFrame(index, columns=['epoch', 'loss', 'accuracy']) index.to_csv(os.path.join(args.model_path, 'index.csv'), index=None) # validation mode elif args.mode == 1: logger.info('cascading DNN classifier, validation mode begins ...') scaler_ls = joblib.load('./standard_scaler_list.pkl') ism_data = preprocessing.ism_data( ism_path=args.ism_validation_path, interval=args.interval, batch_size=args.batch_size, scaler_ls=scaler_ls, Rc=args.Rc, mode=1) # define cascade-DNN classifier N_logit = ism_data.N_logit N_factor = ism_data.N_factor num_train_set = ism_data.num_data classifier = ISMClassifier(sess, N_logit, N_factor, args.learning_rate_base, num_train_set, args.batch_size) # restore model for file_name in os.listdir(args.model_path): f = re.match(r'model\.ckpt\-(\d*)\.data.*', file_name) if f is None: continue else: classifier.saver.restore( sess, os.path.join(args.model_path, 'model.ckpt-' + f.group(1))) logit_all, factor_all, label_all = ism_data.validation_set( ) output, accuracy = classifier.validate( logit_all, factor_all, label_all) print('validation accuracy is {} after training steps {}'. format(accuracy, int(f.group(1)))) # output of gate operation gate = filter_gate() x1, x2, x3, x4 = gate.stop_loss_gate(output, label_all) print(gate.stop_loss_gate(output, label_all)) logger.info('validation accuracy is {0:.3f} for model-{1:n}, original win-ratio = {2:.3f} and num = {3:n}, and filtering win-ratio = {4:.3f} and num = {5:n}' \ .format(accuracy, int(f.group(1)), x1, x2, x3, x4)) # testing mode elif args.mode == 2: logger.info('cascading DNN classifier, evaluation mode begins ...') scaler_ls = joblib.load('./standard_scaler_list.pkl') ism_data = preprocessing.ism_data( ism_path=args.ism_evaluation_path, interval=args.interval, batch_size=args.batch_size, scaler_ls=scaler_ls, Rc=args.Rc, mode=2) # define cascade-DNN classifier N_logit = ism_data.N_logit N_factor = ism_data.N_factor num_train_set = ism_data.num_data classifier = ISMClassifier(sess, N_logit, N_factor, args.learning_rate_base, num_train_set, args.batch_size) # restore model index = pd.read_csv(os.path.join(args.model_path, 'index.csv')) residue = [] # check output directory if not os.path.exists(args.ismcode_output_path): os.mkdir(args.ismcode_output_path) else: del_file(args.ismcode_output_path) for file_name in os.listdir(args.model_path): f = re.match(r'model\.ckpt\-(\d*)\.data.*', file_name) if f is None: continue else: classifier.saver.restore( sess, os.path.join(args.model_path, 'model.ckpt-' + f.group(1))) logit_all, factor_all, ismcode_all = ism_data.evaluation_set( ) output, ismcode = classifier.evaluate( logit_all, factor_all, ismcode_all) # output of gate operation gate = filter_gate() code_ls = gate.filter_output(output, ismcode) residue.append( [int(f.group(1)), float(len(code_ls) / len(output))]) # choose the best model to generate residue = pd.DataFrame(residue, columns=['epoch', 'residue_ratio']) index_all = pd.merge(index, residue, on='epoch') index_all = index_all.sort_values(by='epoch').reset_index() index_all.to_csv(os.path.join(args.model_path, 'index_all.csv'), index=None) index_ranking = np.zeros((len(index_all), 4), dtype=np.float64) index_ranking[:, 0] = index_all['loss'].argsort().values index_ranking[:, 1] = index_all['accuracy'].argsort().values index_ranking[:, 2] = index_all['residue_ratio'].argsort().values index_ranking[:, 3] = 0.4 * index_ranking[:, 0] + 0.4 * index_ranking[:, 1] + 0.2 * index_ranking[:, 2] mode_id = index_all['epoch'].iloc[np.argmin(index_ranking[:, 3])] classifier.saver.restore( sess, os.path.join(args.model_path, 'model.ckpt-' + str(mode_id))) logit_all, factor_all, ismcode_all = ism_data.evaluation_set() output, ismcode = classifier.evaluate(logit_all, factor_all, ismcode_all) # output of gate operation gate = filter_gate() code_ls = gate.filter_output(output, ismcode) np.savetxt(os.path.join(args.ismcode_output_path, 'filter_ismcode.txt'), code_ls, fmt='%s') print('the chosen best model is model-{}'.format(mode_id)) logger.info('the chosen best model is model-{}'.format(mode_id))
def main(args): """ main function doc is missing ... """ with tf.Session() as sess: # training mode if args.mode == 0: ism_data = preprocessing.ism_data(ism_path=args.ism_training_path, interval=args.interval, batch_size=args.batch_size, scaler_ls=None, Rc=args.Rc, mode=0) # define cascade-DNN classifier N_logit = ism_data.N_logit N_factor = ism_data.N_factor num_train_set = ism_data.num_data classifier = ISMClassifier(sess, N_logit, N_factor, args.learning_rate_base, num_train_set, args.batch_size) classifier.initialize() # checkpoint directory if not os.path.exists(args.model_path): os.mkdir(args.model_path) else: # clear directory del_file(args.model_path) # iterate on epoch for ep in range(args.num_epoch): # iterate on batches for step in range(args.train_steps): indices, idsval, shape, factor, labels = ism_data.next_batch( ) loss, accuracy, _ = classifier.train( indices, idsval, shape, factor, labels) if step % 1000 == 0: print('epoch-{},step-{}: loss {}, accuracy {}'.format( ep, step, loss, accuracy)) if (step + 1) % 10000 == 0: classifier.saver.save(sess, os.path.join( args.model_path, 'model.ckpt'), global_step=step) # validation mode elif args.mode == 1: scaler_ls = joblib.load('./standard_scaler_list.pkl') ism_data = preprocessing.ism_data( ism_path=args.ism_validation_path, interval=args.interval, batch_size=args.batch_size, scaler_ls=scaler_ls, Rc=args.Rc, mode=1) # define cascade-DNN classifier N_logit = ism_data.N_logit N_factor = ism_data.N_factor num_train_set = ism_data.num_data classifier = ISMClassifier(sess, N_logit, N_factor, args.learning_rate_base, num_train_set, args.batch_size) # restore model for file_name in os.listdir(args.model_path): f = re.match(r'model\.ckpt\-(\d*)\.data.*', file_name) if f is None: continue else: classifier.saver.restore( sess, os.path.join(args.model_path, 'model.ckpt-' + f.group(1))) indices, idsval, shape, factor, labels = ism_data.validation_set( ) output, accuracy = classifier.validate( indices, idsval, shape, factor, labels) print('validation accuracy is {} after training steps {}'. format(accuracy, int(f.group(1)))) # output of gate operation gate = filter_gate() print(gate.stop_loss_gate(output, labels)) # testing mode elif args.mode == 2: pass
# label = ism_list('./ismdatabak_training', Rc=0.0) # df1 = label.labels() # print(df1) # df1.to_csv('./ism_label.csv', header=None, index=None) # ls = [] # cnt = 0 # for file_name in os.listdir('./ismdatabak_training'): # cnt += 1 # f = re.match(r'ismfactors\.(\d{4})-?(\d{2})-?(\d{2}).*', file_name) # if f is None: # continue # else: # f2 = os.path.join('ismdatabak_training', # re.sub('ismfactors', 'ismlist', f.group())) # if not os.path.isfile(f2): # continue # else: # ls.append(f2) # # print(len(ls), cnt) scaler_ls = joblib.load('standard_scaler_list.pkl') ism = ism_data('./ismdatabak_validation', batch_size=10, scaler_ls=scaler_ls, mode=1) inputs, labels = ism.validation_set() print(inputs)