def investigateBill(billSource, billTitle): print billTitle billFile = billSource + billTitle + ".json" interestedParty = coalition partyFunction = hansardHandler.returnSpecificParty(interestedParty) partyBillBow, startYear = retrieveBillBow(billFile, interestedParty) # note we choose startYear, startYear as only interested in that year _, hansardBows, hansardReference = hansardHandler.budgetToBow(startYear, startYear, None, partyFunction, True, True, False, hansardSource) assert len(hansardBows) == 1 hansardBow = hansardBows[0] print cosineComparison.KLbow(hansardBow, partyBillBow)
import sys sys.path.insert(0, '/Users/jeremypattison/LargeDocument/scripts/dataAggregating') import hansardHandler bowDirectory = "/Users/jeremypattison/LargeDocument/ResearchProjectData/house_hansard/byParty/bowNormalisedAndStemmed/" partyFunction = hansardHandler.filenameToPartyInCharge _, dataset, reference = \ hansardHandler.budgetToBow(2015, 2017, None, partyFunction, True, True, False, bowDirectory) print len(dataset) print reference for i in range(len(dataset)): data = dataset[i] total = 0 print reference[i] print "words types = {0}".format(len(data)) for word in data: total += data[word] print "word totals = {0}".format(total)
import transcriptHandler import cosineComparison import graphs #hansardSource = "/Users/jeremypattison/LargeDocument/ResearchProjectData/house_hansard/bowNormalisedStemmed/" #hansardSource = "/Users/jeremypattison/LargeDocument/ResearchProjectData/house_hansard/bowNormalisedStemmed/" hansardSource = "/Users/jeremypattison/LargeDocument/ResearchProjectData/house_hansard/byParty/bowNormalisedAndStemmed/" transcriptSource = "/Users/jeremypattison/LargeDocument/ResearchProjectData/PMSpeeches/bowNormalisedStemmed" initialYear = 2008 finalYear = 2015 partyFunction = hansardHandler.filenameToPartyInCharge _, hansardBows, hansardReference = hansardHandler.budgetToBow( initialYear, finalYear, None, partyFunction, True, True, False, hansardSource) #print hansardBows #print reference _, transcriptBows, transcriptReference = transcriptHandler.getTranscriptsBudgetDateTechnique( initialYear, finalYear, None, None, transcriptSource, False, False, True) #print transcriptBows #print transcriptReference klValues = [] xAxis = []