Python LstmAutoEncoder.fit 예제들, keras_anomaly_detection.library.recurrent.LstmAutoEncoder.fit Python 예제들

예제 #1

0

파일 보기

파일: anomaly_detect.py 프로젝트: ShirleyHan6/Hack4ClimateOracle

def main():
    data_dir_path = './data'
    model_dir_path = './models'
    ecg_data = pd.read_csv(data_dir_path + '/ground_anomaly.csv')
    ecg_data = ecg_data[1:]
    # print([name for name in ecg_data.columns])
    ecg_data=ecg_data.drop(['TIMESTAMP', 'RECORD', 'AmbTemp_C_Avg', 'InvPAC_kW_Avg', 'PwrMtrP_kW_Avg'], axis=1)
    ecg_np_data = ecg_data.as_matrix()
    scaler = MinMaxScaler()
    ecg_np_data = scaler.fit_transform(ecg_np_data)

    ae = LstmAutoEncoder()
    print(ecg_data.shape)
    column = ecg_data.shape[0]
    print(column)

    # fit the data and save model into model_dir_path
    ae.fit(ecg_np_data[:10000, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.95)

    # load back the model saved in model_dir_path detect anomaly
    ae.load_model(model_dir_path)
    anomaly_information = ae.anomaly(ecg_np_data[:10000, :])
    reconstruction_error = []
    abnormal_number = 0
    idx_list = []
    for idx, (is_anomaly, dist) in enumerate(anomaly_information):
        if is_anomaly:
            abnormal_number = abnormal_number + 1
            print(idx)
            idx_list.append(idx)
            print('# ' + str(idx) + ' is abnormal.')
        reconstruction_error.append(dist)
    print(abnormal_number)
    print(idx_list)
    visualize_reconstruction_error(reconstruction_error, ae.threshold)

예제 #2

0

파일 보기

def main():
    data_dir_path = './data'
    model_dir_path = './models'
    ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv',
                           header=None)
    print(ecg_data.head())
    ecg_np_data = ecg_data.as_matrix()
    scaler = MinMaxScaler()
    ecg_np_data = scaler.fit_transform(ecg_np_data)
    print(ecg_np_data.shape)

    ae = LstmAutoEncoder()

    # fit the data and save model into model_dir_path
    if DO_TRAINING:
        ae.fit(ecg_np_data[:23, :],
               model_dir_path=model_dir_path,
               estimated_negative_sample_ratio=0.9)

    # load back the model saved in model_dir_path detect anomaly
    ae.load_model(model_dir_path)
    anomaly_information = ae.anomaly(ecg_np_data[:23, :])
    reconstruction_error = []
    for idx, (is_anomaly, dist) in enumerate(anomaly_information):
        print('# ' + str(idx) + ' is ' +
              ('abnormal' if is_anomaly else 'normal') + ' (dist: ' +
              str(dist) + ')')
        reconstruction_error.append(dist)

    visualize_reconstruction_error(reconstruction_error, ae.threshold)

예제 #3

0

파일 보기

파일: dl_LstmAutoEncoderVec.py 프로젝트: shariful-cu/deeplearning

def main():

#    data_dir_path = '/Users/Shariful/Documents/GitHubRepo/Datasets/ecg_demo/data'
    data_dir_path = '/Users/Shariful/Documents/DataCamp/ADFA-LD(tf-idf)'
#    model_dir_path = '/Users/Shariful/Documents/GitHubRepo/Datasets/ecg_demo/models'
    model_dir_path = '/Users/Shariful/Documents/GitHubRepo/Datasets/adfa_demo/models'

#    ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None)
#    ecg_data1 = pd.read_csv(data_dir_path + '/test_normal.csv', skiprows=1, \
#                           index_col=None, header=None)
    ecg_data2 = pd.read_csv(data_dir_path + '/train_normal.csv', skiprows=1, \
                           index_col=None, header=None)
    ecg_data3 = pd.read_csv(data_dir_path + '/test_attack.csv', skiprows=1, \
                           index_col=None, header=None)
#    ecg_data1 = ecg_data1.iloc[:, 0:-1]
    ecg_data2 = ecg_data2.iloc[:, 0:-1]
    ecg_data3 = ecg_data3.iloc[:, 0:-1]
    
    ecg_data = pd.concat([ecg_data2, ecg_data3], ignore_index=True)
    
#    print(ecg_data.head())
    ecg_np_data = ecg_data.as_matrix()
    scaler = MinMaxScaler()
    ecg_np_data = scaler.fit_transform(ecg_np_data)
    print(ecg_np_data.shape)

    ae = LstmAutoEncoder()

    # fit the data and save model into model_dir_path
    ae.fit(ecg_np_data[0:832, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9)

    # load back the model saved in model_dir_path detect anomaly
    ae.load_model(model_dir_path)
#    anomaly_information = ae.anomaly(ecg_np_data[:23, :])
    anomaly_information = ae.anomaly(ecg_np_data, threshold=1.75)
    reconstruction_error = []
    for idx, (is_anomaly, dist) in enumerate(anomaly_information):
        print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
        reconstruction_error.append(dist)

    visualize_reconstruction_error(reconstruction_error, ae.threshold)

예제 #4

0

파일 보기

파일: trafficapi.py 프로젝트: bnanita/trafficapi

def lstmnn(inputfile, weekday, lanedirection, hourfrom, hourto):
    # @app.route('/lstmrnn/<inputfile>/<day>/<int:lanedirection>/<int:hourfrom>/<int:hourto>', methods=['GET'])
    # def lstmnn(inputfile,day,lanedirection,hourfrom,hourto):
    begin = time.perf_counter()
    data_dir_path = './datalake'
    model_dir_path = './models'
    ##df = pd.read_csv(data_dir_path + '/Nov2012Dec2014trafficmatches.csv')
    # df = pd.read_csv(data_dir_path + '/Nov2012Dec2017trafficmatch.csv')
    df = pd.read_csv(data_dir_path + '/' + inputfile)
    ##print(df.head())
    dat = df.loc[(df['Week'] == weekday)
                 & (df['LaneDirection'] == lanedirection) &
                 (df['Hour'] >= hourfrom) & (df['Hour'] <= hourto)]
    # dat=df.loc[(df['DayName']==day) & (df['LaneDirection']==lanedirection) & (df['Hour'] >= hourfrom) & (df['Hour'] <= hourto)]
    # dat=df.loc[(df['Week']=='Weekdays') & (df['DirectionDescription']=='South') & (df['Hour'] >= 21) & (df['Hour'] <= 23)]
    ## dat=df.loc[(df['Week']==weekday) & (df['DirectionDescription']==direction) & (df['Hour'] >= hourfrom) & (df['Hour'] <= hourto)]
    dat.insert(0, 'Row', range(0, 0 + len(dat)))
    dat = dat[[
        'Row', 'Sdate', 'DayName', 'LaneNumber', 'DirectionDescription',
        'Volume', 'AvgSpeed', 'Outlier'
    ]]
    print(dat)
    ##traffic_data = pd.read_csv(data_dir_path + '/test_data.csv', header=None)
    ##traffic_data = pd.read_csv(data_dir_path + '/test_south.csv', header=None)
    traffic_data = dat[['Volume']]
    print(traffic_data.head())
    traffic_np_data = traffic_data.values
    scaler = MinMaxScaler()
    traffic_np_data = scaler.fit_transform(traffic_np_data)
    print(traffic_np_data.shape)

    ae = LstmAutoEncoder()

    # fit the data and save model into model_dir_path
    ae.fit(traffic_np_data[:, :],
           model_dir_path=model_dir_path,
           estimated_negative_sample_ratio=0.9)

    # load back the model saved in model_dir_path detect anomaly
    ae.load_model(model_dir_path)
    anomaly_information = ae.anomaly(traffic_np_data)
    reconstruction_error = []

    # new dataframe to store idk and anomaly
    colnames = ['Row', 'OutlierPrediction']
    df2 = pd.DataFrame(columns=colnames)

    for idx, (is_anomaly, dist) in enumerate(anomaly_information):
        print('# ' + str(idx) + ' is ' +
              ('abnormal' if is_anomaly else 'normal') + ' (dist: ' +
              str(dist) + ')')
        df2.loc[len(df2)] = [idx, ('abnormal' if is_anomaly else 'normal')]
        #if is_anomaly :
        #df2.loc[len(df2)] = [idx, 'abnormal']
        #else:
        #pass
        reconstruction_error.append(dist)
    #print(df2)

    dat['OutlierPrediction'] = np.where(df2['OutlierPrediction'] == 'abnormal',
                                        1, 0)
    df3 = dat.loc[dat['OutlierPrediction'] == 1]
    print(df3)

    tn, fp, fn, tp = confusion_matrix(dat['Outlier'].values,
                                      dat['OutlierPrediction'].values).ravel()
    sensitivity = tp / (tp + fn)
    #sensitivity=recall_score(df['Outlier'], df['OutlierPrediction'], average='weighted')
    specificity = tn / (fp + tn)
    BalancedAccuracy = (sensitivity + specificity) / 2
    #FPRate = fp/(fp+tn)
    end = time.perf_counter() - begin
    print('Balanced Accuracy=%.2f' % (BalancedAccuracy))

    # visualize_reconstruction_error(reconstruction_error, ae.threshold)

    #plot
    img = io.BytesIO()
    pyplot.plot(reconstruction_error,
                marker='o',
                ms=3.5,
                linestyle='',
                label='Point')
    pyplot.hlines(ae.threshold,
                  xmin=0,
                  xmax=len(reconstruction_error) - 1,
                  colors="r",
                  zorder=100,
                  label='Threshold')
    pyplot.legend()
    pyplot.ylabel("Dist")
    pyplot.xlabel("Data point index")
    #pyplot.show()
    pyplot.savefig(img, format='png')
    img.seek(0)

    plot_url = base64.b64encode(img.getvalue()).decode()
    rsp = '<img src="data:image/png;base64,{}">'.format(plot_url)

    # return rsp
    # return render_template("home.html", graph=rsp, data=df3.to_html(),baccuracy=BalancedAccuracy)
    return render_template("home.html",
                           graph=rsp,
                           data="Balanced Accuracy = " +
                           str(round(BalancedAccuracy, 2)),
                           data3=df3.to_html(),
                           data2=round(end, 2))

예제 #5

0

파일 보기

파일: LstmAutoEncoderCanali.py 프로젝트: shariful-cu/deeplearning

def main():
    #================read training dataset====================

    #    train_path = '/Users/Shariful/Documents/GitHubRepo/Datasets/ADFA-LD/n-gram/5-gram/train/5_gram.csv'
    #    attack test path
    #    test_path = '/Users/Shariful/Documents/GitHubRepo/Datasets/ADFA-LD/n-gram/5-gram/5_gram_attack_2.csv'
    #    test_data = pd.read_csv(test_path, index_col=0, usecols=[0,1,2,3,4,5])
    #    test_data_np = test_data.as_matrix()
    #    normal test path

    #    data_dir_path = '/Users/Shariful/Documents/GitHubRepo/Datasets/ecg_demo/data'
    data_dir_path = (r'/Users/Shariful/Documents/SysCallDataset/PreparedData'
                     r'/Canali_dataset/sliding_window_5')
    #    model_dir_path = '/Users/Shariful/Documents/GitHubRepo/Datasets/ecg_demo/models'
    model_dir_path = (r'/Users/Shariful/Documents/GitHubRepo/deeplearning/'
                      r'syscall_anomaly/Canali/trained_models')

    score_dir_path = (r'/Users/Shariful/Documents/GitHubRepo/deeplearning/'
                      r'syscall_anomaly/Canali/scores')

    canali_data = pd.read_csv(data_dir_path + '/train_set.csv', header=None)
    #    canali_data = pd.read_csv(data_dir_path + '/train_set.csv', \
    #                           index_col=0, usecols=[0,1,2,3,4,5])

    #==================Fit the LSTM model=====================
    #    ['0','1','2','3','4']
    #    canali_data = canali_data.iloc[:, 0:-1]
    #    print(canali_data.head())
    canali_np_data = canali_data.as_matrix()
    #    scaler = MinMaxScaler()
    #    canali_np_data = scaler.fit_transform(canali_np_data)
    #    print(canali_np_data.shape)

    ae = LstmAutoEncoder()

    # fit the data and save model into model_dir_path
    ae.fit(canali_np_data, model_dir_path=model_dir_path, batch_size=1000, \
           epochs=20, estimated_negative_sample_ratio=None)

    #==========Load the saved model===========

    # load back the model saved in model_dir_path detect anomaly
    ae.load_model(model_dir_path)

    #=============read test dataset===============

    #    test data set
    test_idx_path = data_dir_path + '/test_set_index_range_label.csv'
    df_test_idx = pd.read_csv(test_idx_path, header=None)

    test_path = data_dir_path + '/test_set.csv'
    df_test = pd.read_csv(test_path, header=None)

    df_test_np = df_test.as_matrix()
    #    df_test_np = df_test_np[0:123649,:]

    test_labels = np.array(df_test_idx.iloc[:, -1])

    #    ecg_np_test_data = canali_np_data[0:43559, :]
    #    test_data_np = np.vstack((ecg_np_test_data, test_data_np))

    #================predict scores on testing set============

    #    anomaly_information = ae.anomaly(canali_np_data[:23, :])
    anomaly_information = ae.anomaly(df_test_np, threshold=150)
    #    reconstruction_error = []
    idx_out = 0
    max_scores = np.zeros((df_test_idx.shape[0]))
    for idx_in, (is_anomaly, dist) in enumerate(anomaly_information):
        #        print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')')
        #        reconstruction_error.append(dist)

        #finding the maximum score out of all subsequences' scores
        if idx_in <= df_test_idx.loc[idx_out][:][1]:
            if max_scores[idx_out] < dist:
                max_scores[idx_out] = dist
        else:
            idx_out += 1
            max_scores[idx_out] = dist

#    visualize_reconstruction_error(reconstruction_error, ae.threshold)
    visualize_reconstruction_error(max_scores, ae.threshold)

    #=============load and plot the computed scores on testing set==============

    #    max_scores = pd.read_csv('/Users/Shariful/Documents/GitHubRepo/deeplearning/syscall_anomaly/scores_on_testset/lstm_128_units.csv', \
    #                            header = None)
    #    visualize_reconstruction_error(max_scores, 150)

    #    draw the roc curve
    plot_ROC(test_labels, max_scores)

    #    save the computed scores
    np.savetxt(score_dir_path + '/lstm_128_units.csv',
               max_scores,
               delimiter=",")