Example #1
0
def processScrapedToOpenClosed(printProgress = False):
	def statusOut():
		out = str(pctComplete) + '% complete: ' + \
			'File ' + str(numFilesProcessed + 1) + '/' + str(numFilesTotal) + \
			' (' + fileTime + '.' + courseDataExt + ')' + \
			alignRightSpacer + \
			eta
		return out
	filesToAnalyze = fileUtils.getAllFiles(dataDir = courseDataDir, dataExt = courseDataExt, latestFirst = False)
	try:
		with open(openClosedRawFileLoc, 'r') as existingDataFile:
			existingData = cPickle.load(existingDataFile)
			fileNamesToAnalyze = [fileUtils.getFileNameFromPath(fileName) for fileName in filesToAnalyze]
			fileNamesToAnalyze = list(set(fileNamesToAnalyze).difference(set(existingData)))
			filesToAnalyze = [courseDataDir + '/' + fileName + '.' + courseDataExt for fileName in fileNamesToAnalyze]
			allData = existingData
	except IOError:
		allData = {}
	numFilesProcessed = 0
	numFilesTotal = len(filesToAnalyze)
	pluralText = 'files'
	if numFilesTotal == 1:
		pluralText = 'file'
	if printProgress:
		print 'Processing open/closed data:', numFilesTotal, pluralText, 'to analyze.'
	if numFilesTotal > 0:
		startTime = time.clock()
		alignRightSpacer = ''
		for fileToAnalyze in filesToAnalyze:
			fileTime = fileUtils.getFileNameFromPath(fileToAnalyze)
			timePassed = time.clock() - startTime
			numfilesLeft = numFilesTotal - numFilesProcessed
			pctComplete = numFilesProcessed * 100 / numFilesTotal
			if numFilesProcessed == 0:
				eta = ''
			else:
				etaTime = (timePassed / numFilesProcessed) * numfilesLeft
				etaTimePretty = time.strftime('%H:%M:%S', time.gmtime(etaTime))
				eta = ' (ETA: ' + etaTimePretty + ')'
			try:
				consoleWidth = int(consoleSize()[0])
			except ValueError:
				consoleWidth = 80
			while len(statusOut()) != consoleWidth:
				if len(statusOut()) < consoleWidth:
					alignRightSpacer += ' '
				else:
					if len(alignRightSpacer) == 0:
						break
					alignRightSpacer = alignRightSpacer[:-1]
			if printProgress:
				dynPrint(statusOut())
			courseDataRaw = fileUtils.unpickle(fileToAnalyze)
			courseDataProc = getOpenClosedStats(courseDataRaw)
			allData[fileTime] = courseDataProc
			numFilesProcessed += 1
		with open(openClosedRawFileLoc, 'w') as dataOut:
			cPickle.dump(allData, dataOut)
Example #2
0
	if numFilesTotal > 0:
		openClosedData = fileUtils.unpickle(openClosedRawFileLoc)
		saneOpenClosedFileKeys = sanityCheck(openClosedData)
		saneOpenClosedFileKeysSet = set(saneOpenClosedFileKeys)
		saneFileKeys = saneOpenClosedFileKeysSet.intersection(set(fileUtils.getAllFiles()))
		existingFileKeys = allData['_filesProcessed']
		unprocessedFileKeysSet = set(saneFileKeys).difference(existingFileKeys)
		print len(unprocessedFileKeysSet), 'unprocessed files'
		unprocessedFileKeys = sorted(list(unprocessedFileKeysSet))
		if len(unprocessedFileKeys) > 0:
			for fileTime in unprocessedFileKeys:
				fileLoc = courseDataDir + '/' + fileTime + '.' + courseDataExt
				print fileLoc
				try:
					subjDataRaw = fileUtils.unpickle(fileLoc)
				except IOError:
					continue # with the next file if the one being opened doesn't exist
				subjDataProc = getSubjSeatStats(subjList, subjDataRaw)
				allData[fileTime] = subjDataProc
				allData['_filesProcessed'].add(fileTime)
				numFilesProcessed += 1
			with open(subjectSeatsFileLoc, 'w') as dataOut:
				cPickle.dump(allData, dataOut)
		print 'Done.'

if __name__ == '__main__':
	processScrapedToOpenClosed(printProgress = True)
	dynPrint('Done. Adding diff data to open/closed data...\n')
	processOpenClosedToDiff()
	print 'Done. Processing raw data to section data...'
	processScrapedToSubjectSeats(printProgress = True)