def perform_classification(service_id, settings, SUT, options): max_levels = settings["max_levels"] level_steps = settings["level_steps"] PCA = [None for x in range(0, max_levels)] #session table, for known and unknown known_flow_table = Flow_table(max_levels) unknown_flow_table = Flow_table(max_levels) #the packet filter, used for feature extraction on a packet. pkt_filter = Pkt_len_filter() ts = time.time() print 'start time :%f' % ts print "====================Processing PCAP==============================" #PCAP file processing.... for pcap_file_key in settings["apps"]: #if we don't need to learn the parameters read only the unknown PCAp #file. #note: if (pcap_file_key != "unknown" and options.learn == False): print "already have parameters for service %s, not reading PCAp"\ " file" % (pcap_file_key) continue if (pcap_file_key == "unknown"): pkt_flow_table = unknown_flow_table else: pkt_flow_table = known_flow_table #create the PCAp object for idx in range(0,len(settings["apps"][pcap_file_key])): pcap_file = open(settings["apps"][pcap_file_key][idx], "r") print "Processing PCAP file for service %s:%s" % (pcap_file_key, settings["apps"][pcap_file_key][idx]) for tx, pkt in dpkt.pcap.Reader(pcap_file): #process the packet, generate coeffecients and update the flow #table. pkt_flow_table.process_pkt(pkt=pkt,\ pkt_filter=pkt_filter, service=pcap_file_key,\ sample_idx=idx, max_levels=max_levels) pcap_file.close() print "====================Processing PCAP done==============================" print "====================Creating PCA objects (Supervised)==============================" #initialize the PCA object if (options.learn == True): print "we need to learn/re-learn the theta parameters" for detail_coeff in level_steps: PCA[detail_coeff] = Flow_pca(known_flow_table, service_id, SUT, str(detail_coeff)) PCA[detail_coeff].normalize_and_scale() print "====================PCA object creation done (Supervised)==============================" #generate SVM parameters for each of the different services rewrite_param_json = False svm_data=[None]* len(level_steps) svm_classifier =[None]* len(level_steps) if (options.learn == True): print "====================Creating SVM objects==============================" for detail_coeff in level_steps: svm_data[detail_coeff] = VectorDataSet(PCA[detail_coeff].X, L = PCA[detail_coeff].L) print svm_data[detail_coeff] print "Number of features:%d" % (svm_data[detail_coeff].numFeatures) #use a guassian kernel on this data svm_data[detail_coeff].attachKernel('gaussian', gamma = 4) svm_classifier[detail_coeff] = SVM(C=1000) svm_classifier[detail_coeff].train(svm_data[detail_coeff]) #lets look at the cross validations of the data print "==========performing cross validation========" svm_result = svm_classifier[detail_coeff].cv(svm_data[detail_coeff]) print"Success Rate:%f"% (svm_result.getSuccessRate(0)) print "====================SVM objects created==============================" else: print "skipping writing of the service parameters into the json file" #create the PCA object for the unknown samples print "====================Creating PCA objects (unknown)==============================" PCA_unknown = [None for x in range(0,max_levels)] for detail_coeff in level_steps: PCA_unknown[detail_coeff] = Flow_pca(unknown_flow_table, service_id, SUT, coeffs_idx=str(detail_coeff)) print "normalizing unknown matrix for level %d, number of unknown matrices %d" % (detail_coeff, len(PCA_unknown)) PCA_unknown[detail_coeff].normalize_and_scale() #make sure the number of features in the PCA_unknown matches the number of #features PCA. to make this possible, we get the number of features in each #of the logisitic regression objects and get the number of features in these #objects PCA_features = PCA[detail_coeff].features print "before shape:",PCA_unknown[detail_coeff].X.shape," features:", PCA_features if (PCA_unknown[detail_coeff].X.shape[1] > (PCA_features)): PCA_unknown[detail_coeff].X = PCA_unknown[detail_coeff].X[:,:PCA_features] elif(PCA_unknown[detail_coeff].X.shape[1] < PCA_features): #add more columns ext_array = numpy.zeros([PCA_unknown[detail_coeff].X.shape[0],\ PCA_features-PCA_unknown[detail_coeff].X.shape[1]], numpy.float) print "shape of extension array:",ext_array.shape PCA_unknown[detail_coeff].X = numpy.append(PCA_unknown[detail_coeff].X, ext_array, axis = 1) print "after shape:",PCA_unknown[detail_coeff].X.shape print "====================PCA objects created (unknown)==============================" print "====================Performing SVM on unknown flows =======================" #use the obj_LogReg to run a hypothesis test on each of the flow entry svm_unknown_data = [None]*(len(level_steps)) predicted_labels = [None]*(len(level_steps)) for detail_coeff in level_steps: svm_unknown_data[detail_coeff] = VectorDataSet(PCA_unknown[detail_coeff].X) svm_result = svm_classifier[detail_coeff].test(svm_unknown_data[detail_coeff]) predicted_labels[detail_coeff] = svm_result.getPredictedLabels() print predicted_labels[detail_coeff] print "====================SVM performed (unknown)==============================" print "===================Analyzing results ===================================" out_file = open('success_rate.log','a') for detail_coeff in level_steps: predicted = 0.0 unpredicted = 0.0 out_str = "level:"+str(detail_coeff)+"\n" out_file.write(out_str) for i in range(0, len(predicted_labels[detail_coeff])): print "%s: %s" % (predicted_labels[detail_coeff][i], PCA_unknown[detail_coeff].Keys[i]) if (predicted_labels[detail_coeff][i] == SUT): predicted+=1 else: unpredicted+=1 print "Success rate:%f, predicted:%f, unpredicted:%f" % (predicted/(predicted+unpredicted), predicted, unpredicted) out_str = settings["apps"]["unknown"][0]+":"+str(predicted/(predicted+unpredicted))+"\n" out_file.write(out_str) out_file.close() te = time.time() print 'Total time taken = %f sec' % (te-ts)
print "====================Creating PCA objects (unknown)==============================" PCA_unknown = [None for x in range(0,max_levels)] for detail_coeff in level_steps: PCA_unknown[detail_coeff] = Flow_pca(unknown_flow_table, service_id, SUT, coeffs_idx=str(detail_coeff)) print "normalizing unknown matrix for level %d, number of unknown matrices %d" % (detail_coeff, len(PCA_unknown)) PCA_unknown[detail_coeff].normalize_and_scale() #make sure the number of features in the PCA_unknown matches the number of #features PCA. to make this possible, we get the number of features in each #of the logisitic regression objects and get the number of features in these #objects PCA_features = PCA[detail_coeff].features print "before shape:",PCA_unknown[detail_coeff].X.shape," features:", PCA_features if (PCA_unknown[detail_coeff].X.shape[1] > (PCA_features)): PCA_unknown[detail_coeff].X = PCA_unknown[detail_coeff].X[:,:PCA_features] elif(PCA_unknown[detail_coeff].X.shape[1] < PCA_features): #add more columns ext_array = numpy.zeros([PCA_unknown[detail_coeff].X.shape[0],\ PCA_features-PCA_unknown[detail_coeff].X.shape[1]], numpy.float) print "shape of extension array:",ext_array.shape PCA_unknown[detail_coeff].X = numpy.append(PCA_unknown[detail_coeff].X, ext_array, axis = 1) print "after shape:",PCA_unknown[detail_coeff].X.shape #reduce the unknown matrix PCA_unknown[detail_coeff].X = PCA[detail_coeff].reduceVector(PCA_unknown[detail_coeff].X) print "reduced shape of unknown matrix is", PCA_unknown[detail_coeff].X.shape print "====================PCA objects created (unknown)=============================="