Ejemplo n.º 1
0
def getHistogramFromFile(fin, bucketSize, fout):
	fi = open(fin, 'r')
	totalFile = 0
	totalSize = 0
	bucket 	= dict()

	while True:
		line = fi.readline()
		if not line: break
		line = line.strip()
		if line == '': continue

		field = line.split()
		size = int(field[0])
		
		# if size > 102400: continue		#only consider file less than 100KB
		# if size > 0: continue

		bucket_id = size/bucketSize
		if not bucket_id in bucket:
			bucket[bucket_id] = 1
		else:
			bucket[bucket_id] += 1

		totalSize += size
		totalFile += 1
		# if totalFile > 100: break

	fi.close()

	print 'Total file\t'+str(totalFile)
	print 'Total size\t'+humanReadable(totalSize)
	print 'Avg size  \t'+humanReadable(totalSize * 1.0/totalFile)
	
	#write to csv file -> draw chart by Excel	
	fo = open(fout, 'w')
	for key in sorted(bucket.iterkeys()):
		fo.write(str(key)+','+
				 str(bucket[key])+','+
				 const.humanReadable(key*bucketSize)+','+
				 const.humanReadable((key+1)*bucketSize)+'\n')
		# fo.write(str(key)+','+str(bucket[key])+','+str(key*bucketSize)+','+str((key+1)*bucketSize)+'\n')
	fo.close()
Ejemplo n.º 2
0
def getHistogram(path, bucketSize, fout):
	#check if path exists
	if not os.path.exists(const.FILESYSTEM_PATH):
		print "path doesn't exisit"
		return

	#dictionary of buckets
	bucket 	= dict()
	minSize = sys.maxint
	maxSize = -1
	avgSize = 0
	nFile 	= 0

	#scan through all files, get their sizes, put them in bucket
	for dirPath, subdirs, files in os.walk(path):
		#by pass scan folders & meta files 
		if dirPath == const.SCAN_DIR or dirPath == const.FILESYSTEM_PATH: 
			continue

		for f in files:
			# print dirPath + '/' + f
			size = os.path.getsize(dirPath + '/' + f)
			minSize = min(size, minSize)
			maxSize = max(size, maxSize)
			avgSize += size
			nFile += 1

			bucket_id = size/bucketSize
			if not bucket_id in bucket:
				bucket[bucket_id] = 1
			else:
				bucket[bucket_id] += 1
		
	#write to csv file -> draw chart by Excel	
	fo = open(fout, 'w')
	for key in sorted(bucket.iterkeys()):
		fo.write(str(key)+','+
				 str(bucket[key])+','+
				 const.humanReadable(key*bucketSize)+','+
				 const.humanReadable((key+1)*bucketSize)+'\n')
		# fo.write(str(key)+','+str(bucket[key])+','+str(key*bucketSize)+','+str((key+1)*bucketSize)+'\n')
	fo.close()

	#write statistics
	print 'min size:\t', const.humanReadable(minSize)
	print 'avg size:\t', const.humanReadable(float(avgSize)/nFile)
	print 'max size:\t', const.humanReadable(maxSize)
	print 'total size:\t', const.humanReadable(avgSize)
Ejemplo n.º 3
0
Archivo: fs.py Proyecto: hsn6/xcode
def createFileSystem(filesystemPath, total, sizeDist, repProb, nRepDist, mutProb, nMutDist, mutantLevel, quota = None, logFile = None):
	global fsPath
	fsPath = filesystemPath
	if fsPath[-1] != '/': fsPath = fsPath + '/' 	#filesystem path must end with '/'

	start_time = time.time()

	#recreate file system root
	os.system('rm -rf '+fsPath)									#be careful w/ this command	
	if not os.path.exists(fsPath):
		os.makedirs(fsPath)

	#create folders. estimated 5-84 files per folder
	#todo: need better things
	nDir = random.randint(total/100, total/5)+1					#+1 de tranh truong hop tra lai 0. 
	for i in range(nDir):
		dirPath = fsPath + str(i)
		if not os.path.exists(dirPath):
			os.makedirs(dirPath)

	#open log files
	fu = open(fsPath + const.FS_META_UNIQUE, 'w')
	fm = open(fsPath + const.FS_META_MUTANT, 'w')
	fr = open(fsPath + const.FS_META_REPLIC, 'w')
	hFile = fsPath + const.FS_META_HISTOGRAM

	#add file to file system
	#todo: recheck this  x*(1+repProp.e+mutProb.e) = 1 voi x = probability of unique files in the file system
	# totalUnique = int(round(total/(1+repProb*expect(nRepDist)+mutProb*expect(nMutDist))))			
	uniProb = 1 - repProb - mutProb
	nUnique = int(round(uniProb * total))
	totalUnique = 0
	totalMutant = 0
	totalRep = 0
	totalSize = 0
	count = dict()												#book-keeping: number of files generated by each distribution

	#print out input params
	log([hFile, logFile], 'Creating_FS', True)
	majorDistPercent = 0
	majorDist = -1
	for k,v in sizeDist.iteritems():
		if v > majorDistPercent: 
			majorDistPercent = v
			majorDist = k.name
	inputs  = '\tmajorDist:' + str(majorDist) + '\tdistName:' + str(const.DIST_NAME[majorDist]) + '\n'
	inputs += '\tdupRate:' + str(repProb+mutProb) + '\n'
	inputs += '\tfsPath:' + str(filesystemPath) + '\n'
	inputs += '\ttotal:' + str(total) + '\n'
	inputs += '\tsizeDist:\n'
	for k, v in sizeDist.iteritems():
		inputs += '\t\t%:'+str(v)+'\tdist:'+ const.DIST_NAME[int(k.name)] + '\tparams:' + str(k.params) + '\tmean_size:' + humanReadable(expect(k)) + '\n' 
	inputs += '\trepProb:'+ str(repProb) + '\n'
	inputs += '\tnRepDist:'+ str(nRepDist.name) + '\tparams:' + str(nRepDist.params) + '\n'		
	inputs += '\tmutProb:'+ str(mutProb) + '\n'			
	inputs += '\tnMutDist:'+ str(nMutDist.name) + '\tparams:' + str(nMutDist.params) + '\n'		
	inputs += '\tmutantLevel:'+ str(mutantLevel) + '\n'
	inputs += '\tquota:'+ humanReadable(quota) + '\n'
	inputs += '\tlogFile:'+ str(logFile) + '\n'
	log([hFile, logFile], inputs)
	log(hFile, '\n\t------')
	
	#transform dist
	sizeDist = dict(sizeDist)		#create another copy to protect the input param
	transformDist(sizeDist)

	#start creating files
	fh = open(fsPath + const.FS_META_HISTOGRAM, 'a')
	minSize = sys.maxint
	maxSize = 0
	for i in range(nUnique):
		selectedDist = selectDist(sizeDist, random.random())	#generate random real number -> get the distribution
		
		#create 1 unique file
		fcreated = []
		size = 0
		while size <= 0:
			size = int(round(drawSize(selectedDist)))							#draw file size from distribution
		minSize = min(minSize, size)
		maxSize = max(maxSize, size)
		fcreated.extend(createFiles(1, size, getRandomDir(nDir), i, fu))	#create 1 file, put it in random dir, return file name
		totalUnique += 1

		#create mutant file(s)
		nMutant = drawNumber(mutProb/uniProb, nMutDist)					#draw number of mutant. co the tra lai 0
		if nMutant > 0:											#create mutant(s), put it in random dir, return list of mutant	
			fcreated.extend(createMutants(fcreated[0], i, nMutant, mutantLevel, totalMutant, nDir, fm))
			totalMutant += nMutant

		#create rep file(s)
		nRep = drawNumber(repProb/uniProb, nRepDist)					#draw number of replication
		if nRep > 0:											#create rep (duplicate of unique+mutant), put it in random dir	
			fcreated.extend(createRep(fcreated, nRep, totalRep, nDir, fr))
			totalRep += nRep

		#update statistics info & logs
		# print i, totalUnique, totalMutant, totalRep, totalUnique + totalMutant + totalRep

		if selectedDist not in count:
			count[selectedDist] = 1+nMutant+nRep
		else:
			count[selectedDist] = count[selectedDist]+1+nMutant+nRep

		for f in fcreated:
			fh.write('x:\t'+f+'\t'+str(size)+'\n')

		totalSize += size * (1+nRep+nMutant)
		if quota is not None and totalSize > quota: 
			logStr = '\tOver quota after file ' + str(totalUnique + totalMutant + totalRep) + 'th. quota = ' + humanReadable(quota)
			log(logFile, logStr)
			fh.write(logStr+'\n')
			break

		#check point
		if ((i+1) % 100 == 0):
			print '\tcreated:' + str(i+1) + '/' + str(nUnique) + '\t' + currentTime()
		# print 'size = %d\tnMutant = %d\tnRep = %d' %(size, nMutant, nRep)

	#close log files
	fu.close()
	fm.close()
	fr.close()	
	fh.write('\t------\n\n')
	fh.close()

	#print statistics info
	totalFile = float(totalUnique + totalMutant + totalRep)

	for k, v in sorted(count.iteritems()):
		log([hFile, logFile], '\tdist:'+ const.DIST_NAME[int(k.name)] + '\tparams:' + str(k.params) + '\tfiles:' +str(v) + '\t%:' + str(round(v/totalFile, 2)))
	log([hFile, logFile], '\tfolders:' + str(nDir))

	log([hFile, logFile], '\ttotal_files:' + str(int(totalFile)))
	log([hFile, logFile], '\tunique:'+ str(totalUnique) +'\t%:'+ str(round(totalUnique/totalFile, 2))) 
	log([hFile, logFile], '\tmutant:'+ str(totalMutant) +'\t%:'+ str(round(totalMutant/totalFile, 2)))
	log([hFile, logFile], '\trep:'+ str(totalRep) +'\t%:'+ str(round(totalRep/totalFile, 2)))
	log([hFile, logFile], '\ttotal_size:' + str(const.humanReadable(totalSize)))
	log([hFile, logFile], '\tmin_size:' + str(const.humanReadable(minSize)))
	log([hFile, logFile], '\tavg_size:' + str(const.humanReadable(totalSize/totalFile)))
	log([hFile, logFile], '\tmax_size:' + str(const.humanReadable(maxSize)))
	log([hFile, logFile], '\tcreate_time:'+ str(round(time.time() - start_time, 3)) +'\tseconds', True)