예제 #1
0
 def prediction(self):
     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
         my_net= OdNet(sess,self.parameters)
         sess.run(tf.global_variables_initializer())
         saver = tf.train.Saver(var_list=tf.trainable_variables())
         if os.path.exists('./trained_model/bullymodel.ckpt.index') :
             saver.restore(sess, './trained_model/bullymodel.ckpt')
             #load data
             image, actual,file_list= get_data(self.parameters.test_num,self.parameters.img_path,self.parameters.xml_path)
             #run
             pred_class, pred_class_val, pred_location =  my_net.prediction_run(image,None)
             print('images for prediction:' + str(file_list))
             
             for index, act in zip(range(len(image)), actual):
                 for a in act :
                     print('【img-'+str(index)+' actual】:' + str(a))
                 msg = ("predicted class is: {0}\n "
                       "predicted class value:{1}\n"
                       "predicted location:{2}")
                 print(msg.format(pred_class[index], pred_class_val[index], 
                       pred_location[index]))
                 # print('predicted location:' + str(pred_location[index]))   
         else:
             print('No trained model Exists!')
         sess.close()
def load_split_data(data_size, test_p):
    # Load data and split into train set, test set randomly.
    # data_size is either "100k", "1m", "10m" or "20m".
    # test_p is a float between 0 - 1 indicating the portion of data hold out as test set
    print("split data randomly")
    # Load ratings, data is already permuted in get_data
    ratings = get_data(data_size)
    nb_users = int(np.max(ratings[:, 0]))
    nb_movies = int(np.max(ratings[:, 1]))
    # split test/train set
    test_size = int(len(ratings) * test_p)
    test_ratings = ratings[:test_size]
    train_ratings = ratings[test_size:]
    # train_ratings is converted into a matrix
    train_M = np.zeros((nb_movies, nb_users), dtype=np.float32)
    for rating in train_ratings:
        train_M[int(rating[1] - 1), int(rating[0] - 1)] = rating[2]
    # save test and train data in case more training is needed on this split
    np.save(
        "Data/" + data_size + "_" + str(int(test_p * 100)) +
        "percent_test.npy", test_ratings)
    np.save(
        "Data/" + data_size + "_" + str(int(test_p * 100)) +
        "percent_trainM.npy", train_M)
    # test_ratings is numpy array of user id | item id | rating
    # train_M is numpy array with nb_movies rows and nb_users columns, missing entries are filled with zero
    return test_ratings, train_M, nb_users, nb_movies, len(train_ratings)
예제 #3
0
    def training(self):
        running_count = 0
        
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            my_net= OdNet(sess,self.parameters)
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver(var_list=tf.trainable_variables())
            if os.path.exists('./session_params/session.ckpt.index') :
                print('\nStart Restore')
                saver.restore(sess, './session_params/session.ckpt')
                print('\nEnd Restore')
            
            print('\n============Training start from here============')
            min_loss_location = 100000.
            min_loss_class = 100000.

            while((min_loss_location + min_loss_class) > 0.001 and running_count < self.parameters.iteration_steps):
                running_count += 1
                
                train_data, actual_data,_ = get_data(self.parameters.batch_size,self.parameters.img_path,self.parameters.xml_path)
                if len(train_data) > 0:
                    loss_all,loss_class,loss_location,pred_class,pred_location = my_net.train_run(train_data, actual_data)
                    l = np.sum(loss_location)
                    c = np.sum(loss_class)
                    if min_loss_location > l:
                        min_loss_location = l
                    if min_loss_class > c:
                        min_loss_class = c

                    sum_loss= min_loss_location + min_loss_class
                    msg = ("Step {0}---Loss:[{1:>4.3}|{2:>4.3}]" 
                        " ** Location::{3:>4.3} ** Class::{4:>4.3}"
                        " ** pred_class::[{5:>4.3}|{6:>4.3}|{7:>4.3}]"
                        " ** pred_location::[{8:>4.3f}|{9:>4.3f}|{10:>4.3f}|")
                    print(msg.format(running_count, sum_loss, loss_all, np.sum(loss_location), 
                        np.sum(loss_class),np.sum(pred_class),np.amax(pred_class),
                        np.min(pred_class),np.sum(pred_location),np.amax(pred_location),
                        np.min(pred_location)))

                    # save ckpt 
                    if running_count % 100 == 0:
                        saver.save(sess, './trained_model/bullymodel.ckpt')
                        print('bullymodel.ckpt has been saved.')
                        gc.collect()
                else:
                    print('No image!,pls check')
                    break
                
            saver.save(sess, './trained_model/bullymodel.ckpt')
            sess.close()
            gc.collect()
                
        print('===========Training ended==========')
예제 #4
0
def Basemodel(_model, filename, trigger_flag, evalua_flag, is_binary_class,
              evaluation_list):

    filepath = FLAGS.data_dir
    sequence_window = FLAGS.sequence_window
    is_multi_scale = FLAGS.is_multi_scale
    training_level = FLAGS.scale_levels
    is_add_noise = FLAGS.is_add_noise
    noise_ratio = FLAGS.noise_ratio

    result_list_dict = defaultdict(list)

    for each in evaluation_list:
        result_list_dict[each] = []
    np.random.seed(1337)  # for reproducibility
    # num_selected_features = 25#AS leak tab=0
    # num_selected_features = 32#Slammer tab=0
    #num_selected_features = 20  # Nimda tab=1
    x_train, y_train, x_val, y_val, x_test, y_test = loaddata.get_data_withoutS(
        FLAGS.pooling_type,
        FLAGS.is_add_noise,
        FLAGS.noise_ratio,
        FLAGS.data_dir,
        filename,
        FLAGS.sequence_window,
        trigger_flag,
        is_binary_class,
        multiScale=False,
        waveScale=FLAGS.scale_levels,
        waveType=FLAGS.wave_type)
    #total = np.concatenate((x_train,x_val,x_test))
    #total_label = np.concatenate((y_train,y_val,y_test))

    loaddata.multi_scale_plotting(x_train, y_train)
    for tab_selected_features in range(33, 34):
        #for tab_selected_features in range(33):
        #hello
        if _model == '1NN':
            #x_train, y_train, x_test, y_test = ucr_load_data.load_ucr_data()
            print(_model +
                  " is running..............................................")
            clf = KNeighborsClassifier(n_neighbors=1)
        elif _model == '1NN-DTW':
            x_train, y_train, x_val, y_val, x_test, y_test = loaddata.get_data(
                FLAGS.pooling_type,
                FLAGS.is_add_noise,
                FLAGS.noise_ratio,
                FLAGS.data_dir,
                filename,
                FLAGS.sequence_window,
                trigger_flag,
                is_binary_class,
                multiScale=False,
                waveScale=FLAGS.scale_levels,
                waveType=FLAGS.wave_type)
            x_train = np.transpose(x_train, [0, 2, 1])
            x_test = np.transpose(x_test, [0, 2, 1])
            print(_model +
                  " is running..............................................")

            clf = KnnDtw(n_neighbors=1)
            x_train = x_train[:, tab_selected_features]
            x_test = x_test[:, tab_selected_features]

        elif _model == 'RF':
            clf = RandomForestClassifier(n_estimators=50)
        elif _model == "SVM":
            x_train = x_train[:, [0, 1]]
            x_test = x_test[:, [0, 1]]
            print(_model +
                  " is running..............................................")
            clf = svm.SVC(kernel="rbf",
                          gamma=0.0001,
                          C=100000,
                          probability=True)

        elif _model == "SVMF":
            x_train, y_train, x_val, y_val, x_test, y_test = loaddata.get_data_withoutS(
                FLAGS.pooling_type,
                FLAGS.is_add_noise,
                FLAGS.noise_ratio,
                FLAGS.data_dir,
                filename,
                FLAGS.sequence_window,
                trigger_flag,
                is_binary_class,
                multiScale=False,
                waveScale=FLAGS.scale_levels,
                waveType=FLAGS.wave_type)
            print(_model +
                  " is running.............................................." +
                  str(tab_selected_features))
            clf = svm.SVC(kernel="rbf",
                          gamma=0.0001,
                          C=100000,
                          probability=False)
            estimator = SelectKBest(chi2, k=tab_selected_features)

            x_train_new = estimator.fit_transform(x_train, y_train)
            x_test_new = estimator.fit_transform(x_test, y_test)

            x_train = x_train_new
            x_test = x_test_new

        elif _model == "SVMW":
            print(_model +
                  " is running.............................................." +
                  str(tab_selected_features))
            #estimator = svm.SVC(kernel="rbf", gamma=0.00001, C=1000, probability=False)
            estimator = svm.SVC(kernel="linear")
            clf = RFE(estimator, tab_selected_features, step=1)

        elif _model == "NB":
            print(_model +
                  " is running..............................................")
            clf = BernoulliNB()

        elif _model == "DT":
            print(_model +
                  " is running.............................................." +
                  str(x_train.shape))
            y_train = y_train
            clf = tree.DecisionTreeClassifier()

        elif _model == "Ada.Boost":
            print(_model +
                  " is running.............................................." +
                  str(x_train.shape))
            y_train = y_train
            clf = AdaBoostClassifier(n_estimators=200)

        #visualize.curve_plotting(x_test,y_test,filename,_model)
        clf.fit(x_train, y_train)
        if evalua_flag == True:
            result = clf.predict(x_test)

        else:
            y_test = loaddata.one_hot(y_test)
            result = clf.predict_proba(x_test)
            print(y_test.shape)
            print(result.shape)

        if is_binary_class == True:
            print(y_test.shape)
            print(result.shape)
            #print(evalua_flag)
            results = evaluation.evaluation(
                y_test, result, trigger_flag,
                evalua_flag)  # Computing ACCURACY,F1-score,..,etc
        else:
            accuracy = sklearn.metrics.accuracy_score(y_test, result)
            symbol_list = [0, 1, 2, 3, 4]
            confmat = confusion_matrix(y_test, result, labels=symbol_list)
            #visualize.plotConfusionMatrix(confmat)
            #symbol_list2 = [0]
            #y_= []
            #for symbol in symbol_list2:
            #   for tab in range(len(y_test)):
            #      if y_test[tab] == symbol and y_test[tab] == result[tab]:
            #         y_.append(symbol)
            #print(y_test[0:10])
            #rint(result[0:10])
            #print("Accuracy is :"+str(accuracy))
            #accuracy = float(len(y_))/(list(result).count(symbol))
            print("True is ")
            # print(y_test)
            print("The 0 of True is " + str(list(y_test).count(0)))
            print("The 1 of True is " + str(list(y_test).count(1)))
            print("The 2 of True is " + str(list(y_test).count(2)))
            print("The 3 of True is " + str(list(y_test).count(3)))
            print("The 4 of True is " + str(list(y_test).count(4)))
            # print(len(y_test))
            print("Predict is ")
            # print(result)
            print("The 0 of Predict is " + str(list(result).count(0)))
            print("The 1 of Predict is " + str(list(result).count(1)))
            print("The 2 of Predict is " + str(list(result).count(2)))
            print("The 3 of Predict is " + str(list(result).count(3)))
            print("The 4 of Predict is " + str(list(result).count(4)))
            # print(len(result))
            f1_score = sklearn.metrics.f1_score(y_test,
                                                result,
                                                average="macro")
            print("Accuracy of is :" + str(accuracy))
            print("F-score of is :" + str(f1_score))
            print("Accuracy of " + _model + "is :" + str(accuracy))
            print("F-score of " + _model + "is :" + str(f1_score))
            results = {
                'ACCURACY': accuracy,
                'F1_SCORE': f1_score,
                'AUC': 9999,
                'G_MEAN': 9999
            }
        try:
            y_test2 = np.array(loaddata.reverse_one_hot(y_test))
            result2 = np.array(loaddata.reverse_one_hot(result))
        except:
            y_test2 = y_test
            result2 = result
        # Statistics False Alarm Rate
        with open(
                os.path.join("./stat/",
                             "StatFalseAlarm_" + filename + "_True"),
                "w") as fout:
            for tab in range(len(y_test2)):
                fout.write(str(int(y_test2[tab])) + '\n')
        with open(
                os.path.join(
                    "./stat/", "StatFalseAlarm_" + filename + "_" + _model +
                    "_" + "_Predict"), "w") as fout:
            for tab in range(len(result2)):
                fout.write(str(int(result2[tab])) + '\n')

        for each_eval, each_result in results.items():
            result_list_dict[each_eval].append(each_result)

        for each_eval in evaluation_list:
            result_list_dict[each_eval].append(results[each_eval])
        #for eachk, eachv in result_list_dict.items():
        #result_list_dict[eachk] = np.average(eachv)
        if evalua_flag:
            with open(os.path.join(FLAGS.output, "Comparison_Log_" + filename),
                      "a") as fout:
                outfileline = _model + ":__" + str(tab_selected_features)
                fout.write(outfileline)
                for each_eval in evaluation_list:
                    fout.write(each_eval + ": " + str(
                        round(np.average(result_list_dict[each_eval]), 3)) +
                               ",\t")
                #for eachk, eachv in result_list_dict.items():
                #fout.write(eachk + ": " + str(round(eachv, 3)) + ",\t")
                fout.write('\n')
        #if '-' in _model:continue
        if 'MW' in _model or 'MF' in _model:
            continue
        else:
            break
    if evalua_flag:
        pass
        #return epoch_training_loss_list,epoch_val_loss_list
    else:
        return results
예제 #5
0
    model.model_name = model_name
    return model


"""
def save_model(model):
    print
    print "Saving", model.model_name
    open('%s.json'%model.model_name, 'w').write(model.to_json())
    model.save_weights('%s_weights.h5' % model.model_name, overwrite=True)    
"""

if __name__ == "__main__":
    USE_TOP = 1e8
    batch_size = 10
    trajs, trajs_trn, observable = get_data()
    from keras.callbacks import ModelCheckpoint

    NUM_EPOCHS = 1000
    dims = [3, 10, 50, 100, 300, 500, 700, 1000]
    regs = [
        0,
    ]
    #archs = ['rnn','rnn2','rnn_identity','rnn_init_time','rnn1']
    #archs = ['rnn3',]
    #archs = ['rnn4',]
    #archs = ['rnn4_stacked',]
    archs = [
        'rnn2',
    ]
    #archs = ['rnn2',]; dims = [2,3,5,10]
예제 #6
0
파일: train.py 프로젝트: tartaruszen/MSLSTM
def train_lstm(method, filename_train_list, filename_test, trigger_flag,
               evalua_flag, is_binary_class, result_list_dict,
               evaluation_list):
    global tempstdout
    FLAGS.option = method
    dropout = 0.8
    x_train, y_train, x_val, y_val, x_test, y_test = loaddata.get_data(
        FLAGS.pooling_type,
        FLAGS.is_add_noise,
        FLAGS.noise_ratio,
        FLAGS.data_dir,
        filename_test,
        FLAGS.sequence_window,
        trigger_flag,
        is_binary_class,
        multiScale=FLAGS.is_multi_scale,
        waveScale=FLAGS.scale_levels,
        waveType=FLAGS.wave_type)
    """
    if filename_test == 'HB_AS_Leak.txt':
        filename_train = 'HB_C_N_S.txt'
    elif filename_test == 'HB_Code_Red_I.txt':
        filename_train = 'HB_A_N_S.txt'
    elif filename_test == 'HB_Nimda.txt':
        filename_train = 'HB_A_C_S.txt'
    elif filename_test == 'HB_Slammer.txt':
        filename_train = 'HB_A_C_N.txt'
    print(filename_test)
    #x_train, y_train, x_val, y_val = loaddata.get_trainData(FLAGS.pooling_type, FLAGS.is_add_noise, FLAGS.noise_ratio, FLAGS.data_dir,
    #                                                        filename_train, FLAGS.sequence_window, trigger_flag,is_binary_class,
    #                                                        multiScale=FLAGS.is_multi_scale, waveScale=FLAGS.scale_levels,
    #                                                        waveType=FLAGS.wave_type)
    #x_test, y_test = loaddata.get_testData(FLAGS.pooling_type, FLAGS.is_add_noise, FLAGS.noise_ratio, FLAGS.data_dir,
    #                                       filename_test, FLAGS.sequence_window, trigger_flag,is_binary_class,
    #                                        multiScale=FLAGS.is_multi_scale, waveScale=FLAGS.scale_levels,
    #                                        waveType=FLAGS.wave_type)

    """
    #loaddata.Multi_Scale_Plotting_2(x_train)

    if FLAGS.is_multi_scale:
        FLAGS.scale_levels = x_train.shape[1]
        FLAGS.input_dim = x_train.shape[-1]
        FLAGS.number_class = y_train.shape[1]
        if "Nimda" in filename_test:
            FLAGS.batch_size = int(int(x_train.shape[0]) / 5)
        else:
            FLAGS.batch_size = int(x_train.shape[0])
    else:
        FLAGS.input_dim = x_train.shape[-1]
        FLAGS.number_class = y_train.shape[1]
        if "Nimda" in filename_test:
            FLAGS.batch_size = int(int(x_train.shape[0]) / 5)
        else:
            FLAGS.batch_size = int(x_train.shape[0])
    #g = tf.Graph()
    with tf.Graph().as_default():
        #config = tf.ConfigProto()
        config = tf.ConfigProto(device_count={'/gpu': 0})  #turn GPU on and off
        #config = tf.ConfigProto(log_device_placement=True)
        #config.gpu_options.per_process_gpu_memory_fraction = 0.2
        #with tf.variable_scope("middle")as scope:
        tf.set_random_seed(1337)
        #global_step = tf.Variable(0,name="global_step",trainable=False)
        data_x, data_y = mslstm.inputs(FLAGS.option)
        #output_u_w,prediction, label = mslstm.inference(data_x,data_y,FLAGS.option)

        is_training = tf.placeholder(tf.bool)
        prediction, label, output_last = mslstm.inference(
            data_x, data_y, FLAGS.option, is_training)
        loss = mslstm.loss_(prediction, label)
        tran_op, optimizer = mslstm.train(loss)
        minimize = optimizer.minimize(loss)
        correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(label, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        #summary_op = tf.merge_all_summaries()
        weights = tf.Variable(tf.constant(
            0.1,
            shape=[len(y_test) * FLAGS.sequence_window, 1,
                   FLAGS.scale_levels]),
                              name="weights123")
        init_op = tf.global_variables_initializer()
        #init_op = tf.initialize_all_variables()
        sess = tf.Session(config=config)
        sess.run(init_op)

        #summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph)
        #saver = tf.train.Saver()
        saver = tf.train.Saver({"my_weights": weights})

        epoch_training_loss_list = []
        epoch_training_acc_list = []
        epoch_val_loss_list = []
        epoch_val_acc_list = []
        early_stopping = 10
        no_of_batches = int(len(x_train) / FLAGS.batch_size)
        #visualize.curve_plotting_withWindow(x_train, y_train, 0, "Train_"+'_'+FLAGS.option)
        #visualize.curve_plotting_withWindow(x_test, y_test, 2, "Test_"+'_'+FLAGS.option)
        total_iteration = 0
        for i in range(FLAGS.max_epochs):
            if early_stopping > 0:
                pass
            else:
                break
            j_iteration = 0
            for j_batch in iterate_minibatches(x_train,
                                               y_train,
                                               FLAGS.batch_size,
                                               shuffle=False):
                j_iteration += 1
                total_iteration += 1
                inp, out = j_batch
                sess.run(minimize, {
                    data_x: inp,
                    data_y: out,
                    is_training: True
                })
                training_acc, training_loss = sess.run((accuracy, loss), {
                    data_x: inp,
                    data_y: out,
                    is_training: True
                })
                #sys.stdout = tempstdout
                val_acc, val_loss = sess.run((accuracy, loss), {
                    data_x: x_val,
                    data_y: y_val,
                    is_training: True
                })
            pprint(
                FLAGS.option + "_Epoch%s" % (str(i + 1)) + ">" * 3 +'_Titer-'+str(total_iteration) +'_iter-'+str(j_iteration)+ str(FLAGS.wave_type) + '-' + str(FLAGS.scale_levels) + '-' + str(FLAGS.learning_rate)+'-'+str(FLAGS.num_neurons1)+'-'+str(FLAGS.num_neurons2)+ ">>>=" + "train_accuracy: %s, train_loss: %s" % (
                str(training_acc), str(training_loss)) \
                + ",\tval_accuracy: %s, val_loss: %s" % (str(val_acc), str(val_loss)), method)

            epoch_training_loss_list.append(training_loss)
            epoch_training_acc_list.append(training_acc)
            epoch_val_loss_list.append(val_loss)
            epoch_val_acc_list.append(val_acc)

            try:
                max_val_acc = epoch_val_acc_list[-2]
            except:
                max_val_acc = 0

            if epoch_val_acc_list[-1] < max_val_acc:
                early_stopping -= 1
            elif epoch_val_acc_list[-1] >= max_val_acc:
                early_stopping = 10
            if val_loss > 10 or val_loss == np.nan:
                break
        if 1 < 0:
            #pprint("PPP")
            weights_results = sess.run(output_last, {
                data_x: x_test,
                data_y: y_test
            })
            #print(weights_results)
            #sys.stdout = tempstdout
            visualize.curve_plotting(weights_results, y_test, filename_test,
                                     FLAGS.option)
            #pprint("QQQ")
            with open(filename_test + "_EA.txt", 'w') as fout:
                fout.write(weights_results)
            #sess.run(weights.assign(weights_results))
        else:
            pass

        #weights = output_u_w.eval(session=sess)
        #weights = saver.restore(sess, "./tf_tmp/model.ckpt")
        #pprint(weights)
        #weight_list = return_max_index(weights)
        result = sess.run(prediction, {data_x: x_test, data_y: y_test})
        #print(result)
        #pprint(result)
        #print("LLL")
    saver.save(sess, "./tf_tmp/model.ckpt")
    sess.close()
    #results = evaluation.evaluation(y_test, result)#Computing ACCURACY, F1-Score, .., etc
    if is_binary_class == True:
        #sys.stdout = tempstdout
        results = evaluation.evaluation(
            y_test, result, trigger_flag,
            evalua_flag)  # Computing ACCURACY,F1-score,..,etc
        y_test = loaddata.reverse_one_hot(y_test)
        result = loaddata.reverse_one_hot(result)
    else:
        symbol_list = [0, 1, 2, 3, 4]
        sys.stdout = tempstdout
        print(y_test)
        print(result)
        y_test = loaddata.reverse_one_hot(y_test)
        result = loaddata.reverse_one_hot(result)

        confmat = confusion_matrix(y_test, result, labels=symbol_list)
        visualize.plotConfusionMatrix(confmat)
        #accuracy = sklearn.metrics.accuracy_score(y_test, result)
        symbol_list2 = [0]
        y_ = []
        for symbol in symbol_list2:
            for tab in range(len(y_test)):
                if y_test[tab] == symbol and y_test[tab] == result[tab]:
                    y_.append(symbol)
            # print(y_test[0:10])
            # rint(result[0:10])
            # print("Accuracy is :"+str(accuracy))
            accuracy = float(len(y_)) / (list(result).count(symbol))
            print("Accuracy of " + str(symbol) + " is :" + str(accuracy))
        print("True is ")
        # print(y_test)
        print("The 0 of True is " + str(list(y_test).count(0)))
        print("The 1 of True is " + str(list(y_test).count(1)))
        print("The 2 of True is " + str(list(y_test).count(2)))
        print("The 3 of True is " + str(list(y_test).count(3)))
        print("The 4 of True is " + str(list(y_test).count(4)))
        # print(len(y_test))
        print("Predict is ")
        # print(result)
        print("The 0 of Predict is " + str(list(result).count(0)))
        print("The 1 of Predict is " + str(list(result).count(1)))
        print("The 2 of Predict is " + str(list(result).count(2)))
        print("The 3 of Predict is " + str(list(result).count(3)))
        print("The 4 of Predict is " + str(list(result).count(4)))
        print("Accuracy is :" + str(accuracy))
        f1_score = sklearn.metrics.f1_score(y_test, result, average="macro")
        print("F-score is :" + str(f1_score))
        results = {
            'ACCURACY': accuracy,
            'F1_SCORE': f1_score,
            'AUC': 9999,
            'G_MEAN': 9999
        }
    sys.stdout = tempstdout
    #print(weights_results.shape)
    #print("215")
    y_test2 = np.array(y_test)
    result2 = np.array(result)
    #results = accuracy_score(y_test2, result2)
    #print(y_test2)
    #print(result2)
    #print(results)
    with open(
            os.path.join(os.path.join(os.getcwd(), 'stat'),
                         "StatFalseAlarm_" + filename_test + "_True.txt"),
            "w") as fout:
        for tab in range(len(y_test2)):
            fout.write(str(int(y_test2[tab])) + '\n')
    with open(
            os.path.join(
                os.path.join(os.getcwd(), 'stat'), "StatFalseAlarm_" +
                filename_test + "_" + method + "_" + "_Predict.txt"),
            "w") as fout:
        for tab in range(len(result2)):
            fout.write(str(int(result2[tab])) + '\n')
    #eval_list = ["AUC", "G_MEAN","ACCURACY","F1_SCORE"]
    for each_eval in evaluation_list:
        result_list_dict[each_eval].append(results[each_eval])

    if evalua_flag:
        with open(
                os.path.join(FLAGS.output,
                             "TensorFlow_Log" + filename_test + ".txt"),
                "a") as fout:
            if not FLAGS.is_multi_scale:
                outfileline = FLAGS.option + "_epoch:" + str(
                    FLAGS.max_epochs) + ",_lr:" + str(
                        FLAGS.learning_rate) + ",_multi_scale:" + str(
                            FLAGS.is_multi_scale) + ",hidden_nodes: " + str(
                                FLAGS.num_neurons1) + "/" + str(
                                    FLAGS.num_neurons2) + "\n"
            else:
                outfileline = FLAGS.option + "_epoch:" + str(
                    FLAGS.max_epochs) + ",_wavelet:" + str(
                        FLAGS.wave_type) + ",_lr:" + str(
                            FLAGS.learning_rate) + ",_multi_scale:" + str(
                                FLAGS.is_multi_scale
                            ) + ",_train_set_using_level:" + str(
                                FLAGS.scale_levels) + ",hidden_nodes: " + str(
                                    FLAGS.num_neurons1) + "/" + str(
                                        FLAGS.num_neurons2) + "\n"

            fout.write(outfileline)
            for each_eval in evaluation_list:
                #for eachk, eachv in result_list_dict.items():
                fout.write(
                    each_eval + ": " +
                    str(round(np.mean(result_list_dict[each_eval]), 3)) +
                    ",\t")
            fout.write('\n')
        return epoch_training_acc_list, epoch_val_acc_list, epoch_training_loss_list, epoch_val_loss_list
    else:
        return results
예제 #7
0
파일: run_rnn.py 프로젝트: artemyk/ssc
    
    model.model_name = model_name
    return model

"""
def save_model(model):
    print
    print "Saving", model.model_name
    open('%s.json'%model.model_name, 'w').write(model.to_json())
    model.save_weights('%s_weights.h5' % model.model_name, overwrite=True)    
"""

if __name__ == "__main__":
    USE_TOP    = 1e8
    batch_size = 10
    trajs, trajs_trn, observable = get_data()
    from keras.callbacks import ModelCheckpoint
    
    NUM_EPOCHS=1000
    dims = [3,10,50,100,300,500,700,1000]
    regs = [0,]
    #archs = ['rnn','rnn2','rnn_identity','rnn_init_time','rnn1']
    #archs = ['rnn3',]
    #archs = ['rnn4',]
    #archs = ['rnn4_stacked',]
    archs = ['rnn2',]
    #archs = ['rnn2',]; dims = [2,3,5,10]
    #archs = ['rnn2',]; dims = [50,100]; NUM_EPOCHS = 1000
    #archs = ['rnn2',]; dims = [2,3,5,10]
    #archs = ['rnn2_invstacked',]; dims = [3]; NUM_EPOCHS = 1000
    #regs = [0, 1e-3, 1e-2, 1e-1, 0.5, 1]; dims = [100,]
예제 #8
0
def Basemodel(_model, filename, trigger_flag, evalua_flag, is_binary_class,
              evaluation_list):

    result_list_dict = defaultdict(list)
    for each in evaluation_list:
        result_list_dict[each] = []
    # num_selected_features = 25#AS leak tab=0
    # num_selected_features = 32#Slammer tab=0
    num_selected_features = 33  # Nimda tab=1
    x_train, y_train, x_val, y_val, x_test, y_test = loaddata.get_data_withoutS(
        FLAGS.pooling_type,
        FLAGS.is_add_noise,
        FLAGS.noise_ratio,
        FLAGS.data_dir,
        filename,
        FLAGS.sequence_window,
        trigger_flag,
        is_binary_class,
        multiScale=False,
        waveScale=FLAGS.scale_levels,
        waveType=FLAGS.wave_type)

    print("DDD")
    print(FLAGS.sequence_window)
    FLAGS.input_dim = x_train.shape[-1]
    FLAGS.number_class = y_train.shape[1]

    # using MLP to train
    if _model == "MLP":
        pprint(_model +
               " is running..............................................")
        start = time.clock()
        model = Sequential()
        model.add(
            Dense(FLAGS.num_neurons1,
                  activation="sigmoid",
                  input_dim=FLAGS.input_dim))
        #model.add(Dense(output_dim=FLAGS.num_neurons1))
        #model.add(Dense(output_dim=FLAGS.num_neurons1))
        model.add(Dense(output_dim=FLAGS.number_class))
        model.add(Activation("softmax"))
        sgd = keras.optimizers.SGD(lr=0.01,
                                   momentum=0.0,
                                   decay=0.0,
                                   nesterov=False)
        #model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        model.fit(x_train,
                  y_train,
                  validation_data=(x_val, y_val),
                  batch_size=FLAGS.batch_size,
                  nb_epoch=FLAGS.max_epochs)
        print("AOA")
        result = model.predict(x_test)
        print(result)
        end = time.clock()
        pprint("The Time For MLP is " + str(end - start))
        pprint(x_test.shape)

    elif _model == "RNN":
        pprint(_model +
               " is running..............................................")
        start = time.clock()
        x_train, y_train, x_val, y_val, x_test, y_test = loaddata.get_data(
            FLAGS.pooling_type,
            FLAGS.is_add_noise,
            FLAGS.noise_ratio,
            FLAGS.data_dir,
            filename,
            FLAGS.sequence_window,
            trigger_flag,
            is_binary_class,
            multiScale=False,
            waveScale=FLAGS.scale_levels,
            waveType=FLAGS.wave_type)

        rnn_object1 = SimpleRNN(FLAGS.num_neurons1,
                                input_length=len(x_train[0]),
                                input_dim=FLAGS.input_dim)
        model = Sequential()
        model.add(rnn_object1)  # X.shape is (samples, timesteps, dimension)
        #model.add(Dense(30, activation="sigmoid"))
        #rnn_object2 = SimpleRNN(FLAGS.num_neurons2, input_length=len(x_train[0]), input_dim=FLAGS.input_dim)
        #model.add(rnn_object2)  # X.shape is (samples, timesteps, dimension)
        model.add(Dense(output_dim=FLAGS.number_class))
        model.add(Activation("sigmoid"))
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        model.fit(x_train,
                  y_train,
                  validation_data=(x_val, y_val),
                  batch_size=FLAGS.batch_size,
                  nb_epoch=FLAGS.max_epochs)
        result = model.predict(x_test)
        end = time.clock()
        pprint("The Time For RNN is " + str(end - start))

        # print(result)
    elif _model == "LSTM":
        pprint(_model +
               " is running..............................................")
        start = time.clock()
        x_train, y_train, x_val, y_val, x_test, y_test = loaddata.get_data(
            FLAGS.pooling_type,
            FLAGS.is_add_noise,
            FLAGS.noise_ratio,
            FLAGS.data_dir,
            filename,
            FLAGS.sequence_window,
            trigger_flag,
            is_binary_class,
            multiScale=False,
            waveScale=FLAGS.scale_levels,
            waveType=FLAGS.wave_type)

        initi_weight = keras.initializers.RandomNormal(mean=0.0,
                                                       stddev=1,
                                                       seed=None)
        initi_bias = keras.initializers.Constant(value=0.1)
        lstm_object = LSTM(FLAGS.num_neurons1,
                           input_length=x_train.shape[1],
                           input_dim=FLAGS.input_dim)
        model = Sequential()
        model.add(lstm_object,
                  kernel_initializer=initi_weight,
                  bias_initializer=initi_bias
                  )  # X.shape is (samples, timesteps, dimension)
        #model.add(Dense(30, activation="relu"))
        model.add(Dense(output_dim=FLAGS.number_class))
        model.add(Activation("sigmoid"))
        sgd = keras.optimizers.SGD(lr=0.02,
                                   momentum=0.0,
                                   decay=0.0,
                                   nesterov=False)
        model.compile(optimizer=sgd,
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        model.fit(x_train,
                  y_train,
                  validation_data=(x_val, y_val),
                  batch_size=FLAGS.batch_size,
                  nb_epoch=FLAGS.max_epochs)
        result = model.predict(x_test)
        end = time.clock()
        pprint("The Time For LSTM is " + str(end - start))

    if is_binary_class == True:
        results = evaluation.evaluation(
            y_test, result, trigger_flag,
            evalua_flag)  # Computing ACCURACY,F1-score,..,etc
    else:
        y_test = loaddata.reverse_one_hot(y_test)
        result = loaddata.reverse_one_hot(result)

        accuracy = sklearn.metrics.accuracy_score(y_test, result)

        f1_score = sklearn.metrics.f1_score(y_test, result, average="macro")
        #print("F-score is :" + str(f1_score))
        symbol_list2 = [0]
        y_ = []
        print("True is ")
        #print(y_test)
        print("The 0 of True is " + str(list(y_test).count(0)))
        print("The 1 of True is " + str(list(y_test).count(1)))
        print("The 2 of True is " + str(list(y_test).count(2)))
        print("The 3 of True is " + str(list(y_test).count(3)))
        print("The 4 of True is " + str(list(y_test).count(4)))
        #print(len(y_test))
        print("Predict is ")
        #print(result)
        print("The 0 of Predict is " + str(list(result).count(0)))
        print("The 1 of Predict is " + str(list(result).count(1)))
        print("The 2 of Predict is " + str(list(result).count(2)))
        print("The 3 of Predict is " + str(list(result).count(3)))
        print("The 4 of Predict is " + str(list(result).count(4)))
        #print(len(result))
        print("Accuracy of " + _model + "is :" + str(accuracy))
        print("F-score of " + _model + "is :" + str(f1_score))

        #for symbol in symbol_list2:
        #for tab in range(len(y_test)):
        #if y_test[tab] == symbol and y_test[tab] == result[tab]:
        #y_.append(symbol)
        #accuracy = float(len(y_)) / (list(result).count(symbol))
        #print("Accuracy of " + str(symbol) + " is :" + str(accuracy))
        results = {
            'ACCURACY': accuracy,
            'F1_SCORE': 9999,
            'AUC': 9999,
            'G_MEAN': 9999
        }

    try:
        y_test2 = np.array(loaddata.reverse_one_hot(y_test))
        result2 = np.array(loaddata.reverse_one_hot(result))
    except:
        y_test2 = y_test
        result2 = result
    # Statistics False Alarm Rate
    with open(os.path.join("./stat/", "StatFalseAlarm_" + filename + "_True"),
              "w") as fout:
        for tab in range(len(y_test2)):
            fout.write(str(int(y_test2[tab])) + '\n')
    with open(
            os.path.join(
                "./stat/", "StatFalseAlarm_" + filename + "_" + _model + "_" +
                "_Predict"), "w") as fout:
        for tab in range(len(result2)):
            fout.write(str(int(result2[tab])) + '\n')

    #for each_eval, each_result in results.items():
    #result_list_dict[each_eval].append(each_result)
    for each_eval in evaluation_list:
        result_list_dict[each_eval].append(results[each_eval])
    #for eachk, eachv in result_list_dict.items():
    #result_list_dict[eachk] = np.average(eachv)
    if evalua_flag:
        with open(os.path.join(FLAGS.output, "Comparison_Log_" + filename),
                  "a") as fout:
            outfileline = _model + '_' + str(FLAGS.num_neurons1) + ":__"
            fout.write(outfileline)
            for each_eval in evaluation_list:
                fout.write(
                    each_eval + ": " +
                    str(round(np.average(result_list_dict[each_eval]), 3)) +
                    ",\t")
            #for eachk, eachv in result_list_dict.items():
            #fout.write(eachk + ": " + str(round(eachv, 3)) + ",\t")
            fout.write('\n')
        #return epoch_training_loss_list,epoch_val_loss_list
    else:
        return results