Python groundTruth 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: readGroundTruth

메소드/함수: groundTruth

hotexamples.com에서의 예제들: 3

Python groundTruth - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 readGroundTruth.groundTruth에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: 01_RunDenStream_AllCounters.py 프로젝트: FarRoss/NetCorDenStream

def main(configuration):
    totalExecutionTime = []

    for dataset in configuration['dataset']['availableDataset']:
        print(dataset + '\n')

        truth = groundTruth('GrounTruth/' + dataset + '.txt', fileType='csv')

        for node in configuration['nodes']:
            print('Dataset {} - Node: {} loading ...'.format(dataset, node))

            df = pd.read_csv(configuration['dataset']['path'] + node + dataset + '.csv', low_memory=False).dropna() \
                .drop('Unnamed: 0', axis=1)
            print('Done.')

            times = df['time'].astype('int')
            df = df.drop(['time'], axis=1)

            # Without bravo
            df = df.loc[:, df.std() != 0]
            dfNormalized = normalize_matrix(df).dropna(axis=1)

            bufferDF = dfNormalized[0: configuration['sampleSkip']]
            testDF = dfNormalized[configuration['sampleSkip']:]

            # Anomaly DenStream initialization with the parameters in the configuration file
            aden = DenStream(lamb=configuration['denstreamParameters']['lambda'],
                             epsilon=configuration['denstreamParameters']['epsilon'],
                             beta=configuration['denstreamParameters']['beta'],
                             mu=configuration['denstreamParameters']['mu'],
                             startingBuffer=bufferDF,
                             tp=configuration['denstreamParameters']['tp'])
            aden.runInitialization()

            print('Running algorithm ...')
            outputCurrentNode = []
            startingSimulation = time.time()
            for sampleNumber in range(len(testDF)):
                sample = testDF.iloc[sampleNumber]
                result = aden.runOnNewSample(Sample(sample.values, times.iloc[sampleNumber]))
                outputCurrentNode.append(result)
            endSimulation = time.time() - startingSimulation
            totalExecutionTime.append(endSimulation)
            print('Done in {}'.format(endSimulation))

            df['result'] = [False] * configuration['sampleSkip'] + outputCurrentNode

            print("Number of anomalies in " + str(node) + " is: ", outputCurrentNode.count(True),
                  len(outputCurrentNode))

            if configuration['detectionCriterion'] == 'spatialDetection':
                df['time'] = times
                df[['result', 'time']].to_csv('Data/ResultsSpatialDetection/' + configuration[
                    'featureModel'] + '/' + dataset + '_DENSTREAM_' + node + '.csv', sep=',')
    return aden, truth, df, times, dfNormalized, testDF

예제 #2

파일 보기


for node in config["nodes"]:
    node = 'spine2'
    print(node)
    for dataset in config['dataset']['availableDataset']:
        dataset = 'portflap_first'
        print(dataset)
        table = PrettyTable()
        f_name = str(Path(__file__).parent
                     ) + '/Data/DatasetByNodes/' + node + dataset + '.csv'
        print(f_name)
        if os.path.isfile(f_name):
            df = pd.read_csv(f_name, low_memory=False).drop('Unnamed: 0',
                                                            axis=1)
            truth = groundTruth('GrounTruth/' + dataset + '.txt',
                                fileType='csv')
        print("Files are loaded")

        metric_lst = config['featureList']

        if len(truth.df.index[truth.df.Node == node].tolist()) >= 1:
            node_idx = truth.df.index[truth.df.Node == node].tolist()
            anomalyTime = pd.DataFrame(dtype=bool)

            if len(node_idx) == 1:
                anomalyTime = (df.time.astype('int64') >=
                               truth.events[node_idx[0]]['startTime'] - 50) & (
                                   df.time.astype('int64') <=
                                   truth.events[node_idx[0]]['endTime'] + 50)
            if len(node_idx) == 2:
                anomalyTime = (

예제 #3

파일 보기

def main(configuration):

	resultByNode = {}

	totalExecutionTime = []

	for dataset in configuration['dataset']['list']:

		"""Iterate on all the datasets chosen in the configuration list and read the ground truth file"""
		truth = groundTruth('GrounTruth/'+dataset+'.txt', fileType='csv')

		"""Iterate on all the nodes chosen in the configuration file"""
		for node in configuration['nodes']:

			"""Read node dataset"""
			print 'Dataset {} - Node: {} loading ...'.format(dataset,node),
			df = pd.read_csv(configuration['dataset']['path']+node+dataset+'.csv', low_memory = False)\
							.dropna()\
							.drop('Unnamed: 0', axis=1)
			print 'Done.'

			times = df['time'].astype('int')
			df = df.drop(['time'], axis=1)

			"""Select the chosen features in the configuration file"""
			"""By default the dataset contains all the features"""
			"""If ControlPlane is chosen: only the CP features are extracted from the dataset"""
			"""If DataPlane is chosen: the CP features are discarded, obtaining a dataset with only DataPlane"""
			"""If CompleteFeatures is chosen: pass"""
			if configuration['featureModel'] == 'ControlPlane':
				df = df[configuration['featureList']]
			elif configuration['featureModel'] == 'DataPlane':
				df = df.drop(configuration['featureList'], axis=1)
			elif configuration['featureModel'] == 'CompleteFeatures':
				pass
			else:
				sys.exit('Something wrong in configuration feature model')

			"""Dataset normalization"""	
			df = df.loc[:,df.std()!=0]
			dfNormalized = normalize_matrix(df).dropna(axis=1)
           
			bufferDF = dfNormalized[0:configuration['sampleSkip']]
			testDF = dfNormalized[configuration['sampleSkip']:]

			"""Anomaly DenStream initialization with the parameters in the configuration file"""
			aden = DenStream(lamb = configuration['denstreamParameters']['lambda'],\
							epsilon = configuration['denstreamParameters']['epsilon'],\
							beta = configuration['denstreamParameters']['beta'],\
							mu = configuration['denstreamParameters']['mu'],\
							startingBuffer = bufferDF,
							tp = configuration['denstreamParameters']['tp'])
			aden.runInitialization()

			"""Iterate on all the rows in the dataset and run .runOnNewSample() method of the algorithm"""
			"""The algorithm tries to merge the new sample to the existing clusters"""
			"""If the algorithm merges the sample to a core-mmc: the sample is considered Normal and returns False"""
			"""If the algorithm merges the sample to a outlier-mc or generates a new outlier-mc: the sample is considered Anomalous and returns True""" 
			print 'Running algorithm ...',
			startingSimulation = time.time()
			outputCurrentNode = []
			for sampleNumber in range(len(testDF)):
				sample = testDF.iloc[sampleNumber]
				result = aden.runOnNewSample(Sample(sample.values, times.iloc[sampleNumber]))
				outputCurrentNode.append(result)
			### END Running ###
			endSimulation = time.time() - startingSimulation
			totalExecutionTime.append(endSimulation)
			print 'Done in {}'.format(endSimulation)

			df['result'] = [False] * configuration['sampleSkip'] + outputCurrentNode

			"""Depending on the detection criterion chosen in the configuration file the script produces:"""
			"""1- Results and statistics compared to grountruth if timedetection chosen"""
			"""2- Results for each node if spatialdetection chosen. To compare the results with the groundtruth there is the need to run spatialPerformance.py"""
			if configuration['detectionCriterion'] == 'spatialDetection':
				df['time'] =  times
				df[['result','time']].to_csv('Data/ResultsSpatialDetection/'+configuration['featureModel']+'/'+dataset+'_DENSTREAM_'+node+'.csv', sep=',')

			elif configuration['detectionCriterion'] == 'timeDetection':
				statistics = Statistics(node, truth)
				resultByNode[node+dataset] = statistics.getNodeResult(df, times, kMAX=5)
			else:
				sys.exit('Error detectionCriterion')

	"""	Print result on file if multicoreAnalysis ON. Used only for grid optimization. Very long task""" 
	if configuration['multicoreAnalysis']['ON'] == 'YES':
		path = "DataPlane/"
		with open("Results/"+path+str(configuration["algorithmParameters"]["lambda"])+"_"+str(configuration["algorithmParameters"]["beta"])+"_PRF.json", "w") as outputfile:
			json.dump(statistics.getPrecisionRecallFalseRate(resultByNode, kMAX=5, plot=False), outputfile, indent=4, sort_keys=True)

		resultdelay = statistics.getDelay(resultByNode, kMAX=5, plot=False)

		record = {}

		for row in range(len(resultdelay[0])):
			record['k'+str(row+1)] = list(resultdelay[0][row])

		for row in range(len(resultdelay[1])):
			record['hop'+str(row)] = list(resultdelay[1]['hop'+str(row)])

		with open("Results/"+path+str(configuration["algorithmParameters"]["lambda"])+"_"+str(configuration["algorithmParameters"]["beta"])+"_delay.json", "w") as outputfile:
			json.dump(record, outputfile, indent=4, sort_keys=True)

		with open("Results/"+path+str(configuration["algorithmParameters"]["lambda"])+"_"+str(configuration["algorithmParameters"]["beta"])+"_execTime.json", "w") as outputfile:
			json.dump({'execTime':totalExecutionTime}, outputfile, indent=4, sort_keys=True)

	else:
		"""Compute statistics if time detection chosen"""
		"""The script compares the results with the ground truth and computes precision/recall"""
		"""In the end, writes the results on "resultsKT.json" file, in the "Visualiation" folder""" 
		if configuration['detectionCriterion'] == 'timeDetection':
			resStatistics =  statistics.getPrecisionRecallFalseRate(resultByNode, kMAX=5, plot=True)
			resDelay =  statistics.getDelay(resultByNode, kMAX=5, plot=True)

			print resStatistics
			print resDelay

			resStatistics['Delay'] = resDelay[0][:,0].tolist()
			resStatistics['errDelay'] = resDelay[1]['hop0'].tolist()

			with open('Visualization/resultsKT_'+configuration['featureModel']+'.json', 'w') as outfile:
			    json.dump(resStatistics, outfile, indent=2)

			print 'Time: {}'.format(np.sum(totalExecutionTime))

		"""return all the variables"""
		return aden, truth, df, times, dfNormalized