def drawAllCheckinPlotsByVisitingClassesUsingDemography(model, **conf): plotsFolder = conf['plotsFolder']+'byVisitingClassesUsingDemography/' for locationId, location in model.locationsCheckinsMap.iteritems(): if location['checkins']: locationObject = Location.getObjectFromDict(location['object']) plotsFile = '%s%s/%s'%(plotsFolder, Location.getLocationClassBasedOnVisitingProbability(locationObject),locationId+'.png') FileIO.createDirectoryForFile(plotsFile) checkinsByBinsAndDemographies = defaultdict(dict) demographColorMap = {} for day, binData in location['checkins'].iteritems(): for bin, checkins in binData.iteritems(): bin=int(bin) for user in checkins: demographyId = model.userMap[user]['object']['demography_id'] demographColorMap[demographyId] = model.userMap[user]['object']['demography_color'] if bin not in checkinsByBinsAndDemographies[demographyId]: checkinsByBinsAndDemographies[demographyId][bin]=0 checkinsByBinsAndDemographies[demographyId][bin]+=1 # for bin in checkinsByBinsAndDemographies: # for demographyId in demographColorMap: # plt.scatter([bin], [checkinsByBinsAndDemographies[bin][demographyId]], color=demographColorMap[demographyId]) for demographyId, data in checkinsByBinsAndDemographies.iteritems(): # print smooth([data[k] for k in sorted(data)], 4) plt.fill_between(sorted(data.keys()), smooth([data[k] for k in sorted(data)], 10)[:len(data)], color=demographColorMap[demographyId], alpha=0.65) # plt.hist([k for k, v in checkinsByBins.iteritems() for i in range(v)], conf['noOfBinsPerDay'], normed=True) plt.title(str(locationObject.visitingProbability)) plt.savefig(plotsFile) plt.clf()
def performanceWithSpamDetection(generateData): experimentData = defaultdict(dict) ratios = [0.0,0.4,0.9] marker = dict([(0.0, 's'), (0.4, 'o'), (0.9, 'd')]) # spammerPercentages = [0.2, 0.01, 0.01] spammerPercentages = [0.015, 0.015, 0.015] for iteration in range(10): for spamDetectionRatio, spammerPercentage in zip(ratios, spammerPercentages): experimentFileName = spamModelFolder+'performanceWithSpamDetection/%s/%0.3f'%(iteration,spamDetectionRatio) print experimentFileName if generateData: model = MixedUsersModel() conf = {'model': model, 'numberOfTimeSteps': 100, 'addUsersMethod': User.addUsersUsingRatioWithSpamDetection, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 1-spammerPercentage, 'spammer': spammerPercentage}, # 'spammerMessagingProbability': spammerBudget, 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesSpamFiltered, RankingModel.popularMessages, RankingModel.popularMessagesSpamFiltered], 'spamDetectionRatio': spamDetectionRatio, 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf) else: for data in FileIO.iterateJsonFromFile(experimentFileName): for ranking_id in data['spammmess']: if data['currentTimeStep'] not in experimentData[spamDetectionRatio]: experimentData[spamDetectionRatio][data['currentTimeStep']]=defaultdict(list) experimentData[spamDetectionRatio][data['currentTimeStep']][ranking_id]+=data['spammmess'][ranking_id] if not generateData: sdr = {} for spamDetectionRatio in sorted(experimentData.keys()): dataToPlot = defaultdict(list) for timeUnit in experimentData[spamDetectionRatio]: dataToPlot['x'].append(timeUnit) for ranking_id in experimentData[spamDetectionRatio][timeUnit]: dataToPlot[ranking_id].append(np.mean(experimentData[spamDetectionRatio][timeUnit][ranking_id])) sdr[spamDetectionRatio]=dataToPlot for ranking_id in [RankingModel.LATEST_MESSAGES_SPAM_FILTERED, RankingModel.POPULAR_MESSAGES_SPAM_FILTERED]: # for ranking_id in [RankingModel.LATEST_MESSAGES, RankingModel.POPULAR_MESSAGES]: for spamDetectionRatio in ratios: print ranking_id, spamDetectionRatio dataY = smooth(sdr[spamDetectionRatio][ranking_id],8)[:len(sdr[spamDetectionRatio]['x'])] dataX, dataY = sdr[spamDetectionRatio]['x'][10:], dataY[10:] print 'x', [x-10 for x in dataX] if spamDetectionRatio==0.0: print ranking_id, dataY plt.plot([x-10 for x in dataX], dataY, label='%s'%(labels[ranking_id]), lw=1, marker=marker[spamDetectionRatio]) else: print ranking_id, dataY plt.plot([x-10 for x in dataX], dataY, label='%s (%d'%(labels[ranking_id].replace('Filtering', 'Detection'),spamDetectionRatio*100)+'%)', lw=1, marker=marker[spamDetectionRatio]) plt.ylim(ymin=0, ymax=1) plt.xlim(xmin=0, xmax=75) # plt.title(ranking_id) plt.legend() plt.xlabel('Time', fontsize=16, fontweight='bold') plt.ylabel('Spamness', fontsize=16, fontweight='bold') # plt.show() # plt.savefig('performanceWithSpamDetection_%s.png'%ranking_id) savefig('performanceWithSpamDetection_%s.png'%ranking_id) plt.clf()
def plotDailyDistributionForLattices(timeFrame, file=dailyDistribution): for l in FileIO.iterateJsonFromFile(file%timeFrame): distForLattice = dict([(str(i), []) for i in range(24)]) # distForLattice = dict([(str(i), 0.) for i in range(6)]) print l['llid'] checkinsByDay = l['c'] # days = sorted([datetime.datetime.fromtimestamp(float(d)) for d in checkinsByDay]) # noOfDays = (days[-1]-days[0]).days for day, dist in checkinsByDay.iteritems(): for h, v in dist.iteritems(): distForLattice[h].append(v) dataX = sorted([int(i) for i in distForLattice]) # if sum(distForLattice.values())>1000: plt.plot(dataX, smooth([np.mean(distForLattice[str(k)]) for k in dataX], 3, 'flat')) # plt.show() plt.savefig('images/%s.png'%l['llid']) plt.clf()
def performanceAsNoOfGlobalPayloadsVary(generateData): experimentData = defaultdict(dict) for iteration in range(10): for noOfGlobalSpammerPayloads in range(1,500): # for noOfGlobalSpammerPayloads in range(10,11): Spammer.globalPayloads = None experimentFileName = spamModelFolder+'performanceAsNoOfGlobalPayloadsVary/%s/%0.3f'%(iteration,noOfGlobalSpammerPayloads) print experimentFileName if generateData: model = MixedUsersModel() conf = {'model': model, 'numberOfTimeSteps': 10, 'addUsersMethod': User.addUsersUsingRatio, 'analysisMethods': [(Analysis.measureRankingQuality, 1)], 'ratio': {'normal': 0.985, 'spammer': 0.015}, 'noOfGlobalSpammerPayloads': noOfGlobalSpammerPayloads, 'rankingMethods':[RankingModel.latestMessages, RankingModel.latestMessagesDuplicatesRemoved, RankingModel.popularMessages], 'experimentFileName': experimentFileName} GeneralMethods.runCommand('rm -rf %s'%experimentFileName);run(**conf) else: tempData = defaultdict(list) for data in FileIO.iterateJsonFromFile(experimentFileName): for ranking_id in data['spammmess']: tempData[ranking_id]+=data['spammmess'][ranking_id] experimentData[iteration][noOfGlobalSpammerPayloads]=tempData if not generateData: realDataY = defaultdict(dict) for iteration in experimentData: dataY = defaultdict(list) dataX = [] for perct in sorted(experimentData[iteration]): dataX.append(perct) for ranking_id, values in experimentData[iteration][perct].iteritems(): dataY[ranking_id].append(np.mean(values)) dataX=sorted(dataX) for ranking_id in dataY: for x, y in zip(dataX, dataY[ranking_id]): if x not in realDataY[ranking_id]: realDataY[ranking_id][x]=[] realDataY[ranking_id][x].append(y) for ranking_id in dataY: if ranking_id in labels: dy = [np.mean(realDataY[ranking_id][x]) for x in dataX[:20]] + list(smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]])) #+smooth([np.mean(realDataY[ranking_id][x]) for x in dataX[20:]] plt.semilogx(dataX, dy[:len(dataX)], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id]) # for ranking_id in dataY: plt.plot(dataX, [np.mean(realDataY[ranking_id][x]) for x in dataX], label=labels[ranking_id], lw=1, marker=RankingModel.marker[ranking_id]) plt.xlabel('Payloads Per Spam Group', fontsize=15, fontweight='bold') plt.ylabel('Spamness', fontsize=15, fontweight='bold') # plt.title('Spammness with changing global payloads') plt.legend(loc=4) # plt.show() plt.savefig('performanceAsNoOfGlobalPayloadsVary.png') plt.clf()
def drawAllCheckinPlotsByVisitingClasses(model, **conf): plotsFolder = conf['plotsFolder']+'byVisitingClasses/' for locationId, location in model.locationsCheckinsMap.iteritems(): if location['checkins']: locationObject = Location.getObjectFromDict(location['object']) plotsFile = '%s%s/%s'%(plotsFolder, Location.getLocationClassBasedOnVisitingProbability(locationObject),locationId+'.png') FileIO.createDirectoryForFile(plotsFile) checkinsByBins = defaultdict(int) for day, binData in location['checkins'].iteritems(): for bin, checkins in binData.iteritems(): checkinsByBins[int(bin)]+=len(checkins) # print len(checkinsByBins.keys()), len(smooth(checkinsByBins.values(), 1)[:48]) plt.plot(checkinsByBins.keys(), smooth(checkinsByBins.values(), 1)) # plt.hist([k for k, v in checkinsByBins.iteritems() for i in range(v)], conf['noOfBinsPerDay'], normed=True) plt.title(str(locationObject.visitingProbability)) plt.savefig(plotsFile) plt.clf()
def plotRunningTime(graphType): for data in FileIO.iterateJsonFromFile(runningTimesFolder%graphType): dataX, dataY = zip(*[(d['intervalInSeconds'], d['runningTime']) for d in data['analysis']]) dataX = map(lambda x: x/(24*60*60), dataX) label, marker = 'linear', 'o' if not data['linear']: label, marker = 'logarithmic', 'x' # dataX, dataY = splineSmooth(dataX, dataY) # smooth(dataY, 4) dataY = smooth(dataY, 15)[:len(dataX)] # dataX, dataY = dataX[5:], dataY[5:] plt.plot(dataX, dataY, marker=marker, label=label, lw=2) plt.xlim(xmin=3) plt.legend(loc=2) plt.title(graphType) plt.xlabel('Interval width (days)') plt.ylabel('Running Time (s)') # plt.show() plt.savefig('images/rt_%s.png'%graphType)