Python FeatureExtractor.augmented_frequency 예제들

프로그래밍 언어: Python

클래스/타입: FeatureExtractor

메소드/함수: augmented_frequency

hotexamples.com에서의 예제들: 2

Python FeatureExtractor.augmented_frequency - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 FeatureExtractor.augmented_frequency 패키지로부터 audio에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FeatureExtractor(26)

augmented_frequency(2)

set_class(2)

frequency(2)

get_featurelist_from_nparray(2)

idf(2)

ExtractSIFTForGrid(1)

saveFilenames(1)

set_frameslength(1)

saveWordList(1)

get_velocity(1)

saveAllVectors(1)

processAllDocs(1)

Extract_All_Features(1)

authorFinder(1)

__featureextractor__(1)

similarityFinder(1)

예제 #1

파일 보기

파일: NaiveBayes.py 프로젝트: vairylein/LUSmidterm3

def test(doc, name):

	f= open(name,"w")

	frequencies0 = FeatureExtractor.frequency(doc[:2],True,True) #frequency count smoothed by 1
	frequencies1 = FeatureExtractor.augmented_frequency(frequencies0) # augmented frequencies taking into account document size
	frequencies = FeatureExtractor.idf(frequencies1) # idfs
	total = frequencies["**prob**"]
	totals = sum(total)

	priors =[0.33, 0.33, 0.329] # based on number of documents

	a = ["C2","IKEA_EN","IKEA_IT"]


	with open(doc[2],"r") as mefile:

		for line in mefile:
			lines = line.split('\t')
			ID = lines[0]
			words = lines[4].replace("<s>","").replace("</s>","").split(" ")
			pC2 = 0
			pIKEA_IT = 0
			pIKEA_EN = 0
			for word in words:
				
				if word in frequencies:
					pC2 += math.log((frequencies[word][0]))
					pIKEA_EN += math.log((frequencies[word][1]))
					pIKEA_IT += math.log((frequencies[word][2]))

				else:
					pC2 += math.log(0.5)
					pIKEA_EN += math.log(0.5)
					pIKEA_IT += math.log(0.5)
			
			b = [pC2+math.log(priors[0]),pIKEA_EN+math.log(priors[1]),pIKEA_IT+math.log(priors[2])]
			
			

			proposal = a[b.index(max(b))]
			f.write(ID+ "\t" + proposal + "\n")


	f.close()

예제 #2

파일 보기

파일: NaiveBayes.py 프로젝트: vairylein/LUSmidterm3

def validate(doc, name):

	f= open(name,"w")

	frequencies0 = FeatureExtractor.frequency(doc[:2]) #frequency count smoothed by 1
	frequencies1 = FeatureExtractor.augmented_frequency(frequencies0) # augmented frequencies taking into account document size
	frequencies = FeatureExtractor.idf(frequencies1) # idfs
	total = frequencies["**prob**"]
	totals = sum(total)

	priors =[0.33, 0.33, 0.328] # based on number of documents


	a = ["C2","IKEA_EN","IKEA_IT"]
	correct = 0
	number = 0

	tpC2      = 0
	tpIKEA_EN = 0
	tpIKEA_IT = 0
	fpC2      = 0
	fpIKEA_EN = 0
	fpIKEA_IT = 0
	fnC2      = 0
	fnIKEA_EN = 0
	fnIKEA_IT = 0


	with open(doc[2],"r") as mefile:

		for line in mefile:
			lines = line.split('\t')
			ID = lines[1]
			words = lines[4].replace("<s>","").replace("</s>","").split(" ")
			pC2 = 0
			pIKEA_IT = 0
			pIKEA_EN = 0
			for word in words:
				
				if word in frequencies:
					pC2 += math.log((frequencies[word][0]))
					pIKEA_EN += math.log((frequencies[word][1]))
					pIKEA_IT += math.log((frequencies[word][2]))

				else:
					pC2 += math.log(0.5)
					pIKEA_EN += math.log(0.5)
					pIKEA_IT += math.log(0.5)
			
			b = [pC2+math.log(priors[0]),pIKEA_EN+math.log(priors[1]),pIKEA_IT+math.log(priors[2])]
			# other possibilities
			# d = [pC2,pIKEA_EN,pIKEA_IT] # without priors
			# c = [-pC2*priors[0],-pIKEA_EN*priors[1],-pIKEA_IT*priors[2]] # multiplying by priors
			
			

			proposal = a[b.index(max(b))]
			f.write(proposal + "\t" + ID + "\n")

			# calculate precision, recall, f1
			# count true positives, false positives, false negatives
			print proposal
			

			if ID == proposal:
				if ID == "C2":
					tpC2+=1
				elif ID == "IKEA_EN":
					tpIKEA_EN +=1
				elif ID == "IKEA_IT":
					tpIKEA_IT += 1
				correct += 1
			else:
				if ID == "C2":
					fnC2+=1
				elif ID == "IKEA_EN":
					fnIKEA_EN +=1
				elif ID == "IKEA_IT":
					fnIKEA_IT += 1
				if proposal == "C2":
					fpC2+=1
				elif proposal == "IKEA_EN":
					fpIKEA_EN +=1
				elif proposal == "IKEA_IT":
					fpIKEA_IT += 1

			number +=1

	print fnC2
	precisionC2 = tpC2 / ( tpC2 + fpC2 )
	precisionIKEA_IT = tpIKEA_IT / ( tpIKEA_IT + fpIKEA_IT)
	precisionIKEA_EN = tpIKEA_EN / ( tpIKEA_EN + fpIKEA_EN)
	precisions = [precisionC2, precisionIKEA_EN,precisionIKEA_IT]
	recallC2 = tpC2 / ( tpC2 + fnC2 )
	recallIKEA_IT = tpIKEA_IT / ( tpIKEA_IT + fnIKEA_IT)
	recallIKEA_EN = tpIKEA_EN / ( tpIKEA_EN + fnIKEA_EN)
	recalls = [recallC2,recallIKEA_EN,recallIKEA_IT]

	avgpre = sum(precisions)/3
	avgrec = sum(recalls)/3



	f.write("\n\ncorrect: " + str(correct) + "out of" + str(number))
	f.write("\nprecision: " + str(avgpre))
	f.write("\nrecall: " + str(avgrec))
	f.write("\nF1: " + str( 2* ((avgpre*avgrec) / (avgpre + avgrec)) ))

	f.close()