def getLocationDistributionPlots(place): for clustering in iteraterUserClusterings(place): for location in locationToUserMapIterator(place): print clustering[0], location['location'] fileName=placesImagesFolder%place['name']+str(clustering[0])+'/'+ location['location'].replace(' ', '_').replace('.', '+')+'.png' FileIO.createDirectoryForFile(fileName) getPerLocationDistributionPlots(clustering, location, fileName)
def drawAllCheckinPlotsByVisitingClassesUsingDemography(model, **conf): plotsFolder = conf['plotsFolder']+'byVisitingClassesUsingDemography/' for locationId, location in model.locationsCheckinsMap.iteritems(): if location['checkins']: locationObject = Location.getObjectFromDict(location['object']) plotsFile = '%s%s/%s'%(plotsFolder, Location.getLocationClassBasedOnVisitingProbability(locationObject),locationId+'.png') FileIO.createDirectoryForFile(plotsFile) checkinsByBinsAndDemographies = defaultdict(dict) demographColorMap = {} for day, binData in location['checkins'].iteritems(): for bin, checkins in binData.iteritems(): bin=int(bin) for user in checkins: demographyId = model.userMap[user]['object']['demography_id'] demographColorMap[demographyId] = model.userMap[user]['object']['demography_color'] if bin not in checkinsByBinsAndDemographies[demographyId]: checkinsByBinsAndDemographies[demographyId][bin]=0 checkinsByBinsAndDemographies[demographyId][bin]+=1 # for bin in checkinsByBinsAndDemographies: # for demographyId in demographColorMap: # plt.scatter([bin], [checkinsByBinsAndDemographies[bin][demographyId]], color=demographColorMap[demographyId]) for demographyId, data in checkinsByBinsAndDemographies.iteritems(): # print smooth([data[k] for k in sorted(data)], 4) plt.fill_between(sorted(data.keys()), smooth([data[k] for k in sorted(data)], 10)[:len(data)], color=demographColorMap[demographyId], alpha=0.65) # plt.hist([k for k, v in checkinsByBins.iteritems() for i in range(v)], conf['noOfBinsPerDay'], normed=True) plt.title(str(locationObject.visitingProbability)) plt.savefig(plotsFile) plt.clf()
def writeClusterKML(): kml = SpotsKML() outputKMLFile='%s/clusters.kml'%placesAnalysisFolder%place['name'] for data in FileIO.iterateJsonFromFile(placesUserClusterFeaturesFile%place['name']): clusterId, color, features = data kml.addLocationPointsWithTitles([(getLocationFromLid(f[0].replace('_', ' ')), f[2]) for f in features[:noOfFeatures]], color=color) FileIO.createDirectoryForFile(outputKMLFile) kml.write(outputKMLFile)
def writeUserClusterKMLs(place): clustering = getUserClustering(place, place.get('k')) colorMap = clustering[3] for clusterId, details in sorted(getUserClusteringDetails(place, clustering).iteritems(), key=lambda k: int(k[0])): kml = SpotsKML() kml.addLocationPointsWithTitles([(getLocationFromLid(lid), unicode(name).encode('utf-8')) for lid, name, _ in details['locations'][:5]], color=colorMap[clusterId]) outputKMLFile=placesKMLsFolder%place['name']+'locations/userClusters/%s/%s.kml'%(str(clustering[0]), str(clusterId)) FileIO.createDirectoryForFile(outputKMLFile) kml.write(outputKMLFile)
def getLocationsCheckinDistribution(place): checkinDistribution = {} for location in locationToUserMapIterator(place): checkinDistribution[location['location']]=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()]) dataX, dataY = getDataDistribution(checkinDistribution.values()) plt.loglog(dataX,dataY) outputFile = placesAnalysisFolder%place['name']+'locationsCheckinDistribution.png' FileIO.createDirectoryForFile(outputFile) plt.savefig(outputFile)
def writeARFFFile(place): userVectors = defaultdict(dict) locationToUserMap = dict((l['location'], l) for l in locationToUserMapIterator(place, minCheckins=50)) for lid in locationToUserMap: for user in locationToUserMap[lid]['users']: userVectors[user][lid.replace(' ', '_')]=sum(len(locationToUserMap[lid]['users'][user][d][db]) for d in locationToUserMap[lid]['users'][user] for db in locationToUserMap[lid]['users'][user][d]) for user in userVectors.keys()[:]: if sum(userVectors[user].itervalues())<place['minUserCheckins']: del userVectors[user] arffFile=ARFF.writeARFFForClustering(userVectors, place['name']) outputFileName = getARFFFileName(place) FileIO.createDirectoryForFile(outputFileName) GeneralMethods.runCommand('mv %s %s'%(arffFile, outputFileName))
def plotGaussianGraphsForClusters(place): for location in Analysis.iterateLocationsWithClusterDetails(place): total = location['total'] clustersInfo = location['clustersInfo'] for clusterId, data in clustersInfo.iteritems(): mean, std, clusterSum, color = data['mean'], data['std'], data['clusterSum'], data['color'] if std!=0: plotNorm(clusterSum/total, mean, std, color=color, label=str(clusterId)) else: plotNorm(clusterSum/total, mean, random.uniform(0.1, 0.5), color=color, label=str(clusterId)) plt.xlim(xmin=0, xmax=23); plt.legend() plt.title(location['name']) fileName = '/'.join([placesGaussianImagesFolder%place['name'], getLocationType(location), location['location'].replace(' ', '_').replace('.', '+')+'.png']) print fileName FileIO.createDirectoryForFile(fileName) plt.savefig(fileName), plt.clf()
def hashtag_groups_dot_files(association_measure_file=f_fisher_exact_association_measure): output_file_format = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+\ '/'+association_measure_file.split('/')[-1]+'/%s.dot' for line_no, data in\ enumerate(FileIO.iterateJsonFromFile(association_measure_file, remove_params_dict=True)): _, _, edges = data graph = nx.Graph() for edge in edges: u,v,attr_dict = edge u = unicode(u).encode('utf-8') v = unicode(v).encode('utf-8') graph.add_edge(u,v, attr_dict) output_file = output_file_format%line_no print 'Writing file: ', output_file FileIO.createDirectoryForFile(output_file) nx.write_dot(graph, output_file)
def drawAllCheckinPlotsByVisitingClasses(model, **conf): plotsFolder = conf['plotsFolder']+'byVisitingClasses/' for locationId, location in model.locationsCheckinsMap.iteritems(): if location['checkins']: locationObject = Location.getObjectFromDict(location['object']) plotsFile = '%s%s/%s'%(plotsFolder, Location.getLocationClassBasedOnVisitingProbability(locationObject),locationId+'.png') FileIO.createDirectoryForFile(plotsFile) checkinsByBins = defaultdict(int) for day, binData in location['checkins'].iteritems(): for bin, checkins in binData.iteritems(): checkinsByBins[int(bin)]+=len(checkins) # print len(checkinsByBins.keys()), len(smooth(checkinsByBins.values(), 1)[:48]) plt.plot(checkinsByBins.keys(), smooth(checkinsByBins.values(), 1)) # plt.hist([k for k, v in checkinsByBins.iteritems() for i in range(v)], conf['noOfBinsPerDay'], normed=True) plt.title(str(locationObject.visitingProbability)) plt.savefig(plotsFile) plt.clf()
def plot_locations_influence_on_world_map(ltuo_model_id_and_hashtag_tag, noOfInfluencers=10, percentage_of_locations=0.15): input_locations = [ ('40.6000_-73.9500', 'new_york'), ('33.3500_-118.1750', 'los_angeles'), ('29.7250_-97.1500', 'austin'), ('30.4500_-95.7000', 'college_station'), ('-22.4750_-42.7750', 'rio'), ('51.4750_0.0000', 'london'), ('-23.2000_-46.4000', 'sao_paulo') ] for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag: tuo_location_and_tuo_neighbor_location_and_locations_influence_score = \ Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers=None, influence_type=InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE) for input_location, label in input_locations: for location, tuo_neighbor_location_and_locations_influence_score in \ tuo_location_and_tuo_neighbor_location_and_locations_influence_score: if input_location==location: input_location = getLocationFromLid(input_location.replace('_', ' ')) output_file = fld_results%GeneralMethods.get_method_id() + '/%s_%s/%s.png'%(model_id, hashtag_tag, label) number_of_outgoing_influences = int(len(tuo_neighbor_location_and_locations_influence_score)*percentage_of_locations) if number_of_outgoing_influences==0: number_of_outgoing_influences=len(tuo_neighbor_location_and_locations_influence_score) locations = zip(*tuo_neighbor_location_and_locations_influence_score)[0][:number_of_outgoing_influences] locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations] # locations = filter(lambda location: isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY), locations) if locations: _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#000000', c='#FF00FF', returnBaseMapObject=True, lw = 0) # _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#FF00FF', returnBaseMapObject=True, lw = 0) for location in locations: # if isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY): m.drawgreatcircle(location[1], location[0], input_location[1], input_location[0], color='#FAA31B', lw=1., alpha=0.5) # plotPointsOnWorldMap([input_location], blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0) plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#000000', c='#003CFF', s=40, lw = 0) # plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0) FileIO.createDirectoryForFile(output_file) print output_file savefig(output_file) plt.clf() else: GeneralMethods.runCommand('rm -rf %s'%output_file) break
def getLocationPlots(place, clusterOVLType, type='scatter'): clustering = getUserClustering(place, place.get('k')) validClusters = getUserClusteringDetails(place, clustering).keys() def scatterPlot(clustering, location, fileName): userClusterMap = {} for clusterId, users in clustering[2]['clusters'].iteritems(): for user in users: if user in location['users']: userClusterMap[user]=clusterId scatterData = defaultdict(dict) clusterMap = clustering[3] for user, userVector in location['users'].iteritems(): if user in userClusterMap: for d in userVector: for db in userVector[d]: for h in [(datetime.datetime.fromtimestamp(ep).hour-6)%24 for ep in userVector[d][db]]: if h not in scatterData[userClusterMap[user]]: scatterData[userClusterMap[user]][h]=0 scatterData[userClusterMap[user]][h]+=1 # total = float(sum([k for cluster, clusterInfo in scatterData.iteritems() for k, v in clusterInfo.iteritems() for i in range(v)])) for cluster, clusterInfo in scatterData.iteritems(): if cluster in validClusters: if type=='normal': data = [k for k, v in clusterInfo.iteritems() for i in range(v)] mean, std = np.mean(data), np.std(data) if std!=0: plotNorm(sum(data), mean, std, color=clusterMap[cluster]) else: plotNorm(sum(data), mean, random.uniform(0.1, 0.5), color=clusterMap[cluster]) elif type=='scatter': plt.scatter(clusterInfo.keys(), clusterInfo.values(), color=clusterMap[cluster], label=cluster) plt.title('%s (%s)'%(location['name'],location['location'])),plt.legend() # plt.show() plt.xlim(xmin=0,xmax=24) plt.savefig(fileName), plt.clf() # for clustering in iteraterUserClusterings(place): for location in locationToUserMapIterator(place, minCheckins=place['minLocationCheckinsForPlots']): # for location in iterateLocationsByOVLAndClustersType(place, clusterOVLType): # location = location['details'] print clustering[0], location['location'] fileName=placesImagesFolder%place['name']+'%s/'%type+str(clustering[0])+'/'+ location['location'].replace(' ', '_').replace('.', '+')+'.png' FileIO.createDirectoryForFile(fileName) scatterPlot(clustering, location, fileName)
def plot_maps_for_every_hour(): MINUTES = 15 hashtags = ['ripstevejobs', 'cnbcdebate'] map_from_hashtag_to_subplot = dict([('ripstevejobs', 211), ('cnbcdebate', 212)]) map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag = defaultdict(dict) for hashtag in hashtags: for hashtag_object in FileIO.iterateJsonFromFile('./data/%s.json'%hashtag): map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtag_object), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time = sorted(map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time.iteritems(), key=itemgetter(0)) epoch_starting_time_unit = tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time[0][0] epoch_ending_time_unit = epoch_starting_time_unit+24*60*60 for epoch_time_unit, tuples_of_location_and_epoch_occurrence_time in tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time: if epoch_time_unit<=epoch_ending_time_unit: if tuples_of_location_and_epoch_occurrence_time: epoch_lag = epoch_time_unit - epoch_starting_time_unit tuples_of_location_and_epoch_occurrence_time = sorted(tuples_of_location_and_epoch_occurrence_time, key=itemgetter(1)) map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag][hashtag] = [(getLatticeLid(location, 0.145), epoch_occurrence_time-epoch_starting_time_unit)for location, epoch_occurrence_time in tuples_of_location_and_epoch_occurrence_time] map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag = defaultdict(list) GeneralMethods.runCommand('rm -rf ./images/plot_maps_for_every_hour/') for epoch_lag in sorted(map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag): file_world_map_plot = './images/plot_maps_for_every_hour/%s.png'%(epoch_lag) print file_world_map_plot map_from_hashtag_to_tuples_of_location_and_epoch_lag = map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag] for hashtag, tuples_of_location_and_epoch_lag in map_from_hashtag_to_tuples_of_location_and_epoch_lag.iteritems(): map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag[hashtag]+=tuples_of_location_and_epoch_lag for hashtag, accumulated_tuples_of_location_and_epoch_lag in map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag.iteritems(): plt.subplot(map_from_hashtag_to_subplot[hashtag]) tuples_of_location_and_epoch_max_lag= [(location, max(zip(*iterator_of_tuples_of_location_and_epoch_lag)[1])) for location, iterator_of_tuples_of_location_and_epoch_lag in groupby(sorted(accumulated_tuples_of_location_and_epoch_lag, key=itemgetter(0)), key=itemgetter(0)) ] locations, colors = zip(*[(getLocationFromLid(location.replace('_', ' ')), (epoch_lag+MINUTES*60)-epoch_max_lag) for location, epoch_max_lag in sorted(tuples_of_location_and_epoch_max_lag, key=itemgetter(1), reverse=True)]) plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap=matplotlib.cm.cool, lw = 0, vmax=epoch_lag+MINUTES*60) plt.title('%s (%s hours)'%(hashtag, (epoch_lag+MINUTES*60)/(60.*60))) # plt.show() FileIO.createDirectoryForFile(file_world_map_plot) plt.savefig(file_world_map_plot) plt.clf()
def utm_object_analysis(): ltuo_utm_id_and_num_of_neighbors_and_mean_common_h_count = [] output_file = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'.df' so_valid_utm_ids = set() for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, True): so_valid_utm_ids.add(utm_object['utm_id']) for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, True): so_valid_nei_utm_ids = set(utm_object['mf_nei_utm_id_to_common_h_count']).intersection(so_valid_utm_ids) mean_num_of_common_h_count = np.mean([utm_object['mf_nei_utm_id_to_common_h_count'][nei_utm_id] for nei_utm_id in so_valid_nei_utm_ids]) ltuo_utm_id_and_num_of_neighbors_and_mean_common_h_count.append([utm_object['utm_id'], len(so_valid_nei_utm_ids), mean_num_of_common_h_count]) utm_ids, num_of_neighbors, mean_common_h_count = zip(*ltuo_utm_id_and_num_of_neighbors_and_mean_common_h_count) od = rlc.OrdDict([ ('utm_ids', robjects.StrVector(utm_ids)), ('num_of_neighbors', robjects.FloatVector(num_of_neighbors)), ('mean_common_h_count', robjects.FloatVector(mean_common_h_count)) ]) df = robjects.DataFrame(od) FileIO.createDirectoryForFile(output_file) print 'Saving df to: ', output_file df.to_csvfile(output_file)
def locations_at_top_and_bottom(model_ids, no_of_locations=5): for model_id in model_ids: output_file_format = analysis_folder+'%s/'%(GeneralMethods.get_method_id())+'%s/%s.json' input_locations = [ # ('40.6000_-73.9500', 'new_york'), ('30.4500_-95.7000', 'college_station'), ] tuo_location_and_tuo_neighbor_location_and_influence_score = \ Experiments.load_tuo_location_and_tuo_neighbor_location_and_pure_influence_score(model_id) for input_location, label in input_locations: for location, tuo_neighbor_location_and_influence_score in \ tuo_location_and_tuo_neighbor_location_and_influence_score: if input_location==location: output_file = output_file_format%(input_location, model_id) GeneralMethods.runCommand('rm -rf %s'%output_file) FileIO.createDirectoryForFile(output_file) FileIO.writeToFileAsJson("Bottom:", output_file) for neighbor_location_and_influence_score in tuo_neighbor_location_and_influence_score[:no_of_locations]: FileIO.writeToFileAsJson(neighbor_location_and_influence_score+[''], output_file) FileIO.writeToFileAsJson("Top:", output_file) for neighbor_location_and_influence_score in \ reversed(tuo_neighbor_location_and_influence_score[-no_of_locations:]): FileIO.writeToFileAsJson(neighbor_location_and_influence_score+[''], output_file)
def plotHastagClasses(timeRange, folderType): def getFileName(): for i in combinations('abcedfghijklmnopqrstuvwxyz',2): yield ''.join(i)+'.png' count=1 # for hashtagObject in FileIO.iterateJsonFromFile(hashtagsWithoutEndingWindowFile%(folderType,'%s_%s'%timeRange)): for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('testing_world','%s_%s'%(2,11))): # HashtagsClassifier.classify(hashtagObject) print count; count+=1 # if hashtagObject['h']=='ripamy': classId = HashtagsClassifier.classify(hashtagObject) if classId!=None: classId = 1 outputFile = hashtagsImagesHashtagsClassFolder%folderType+'%s/%s.png'%(classId, hashtagObject['h']); FileIO.createDirectoryForFile(outputFile) fileNameIterator = getFileName() timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) occurancesInActivityRegions = [[getOccuranesInHighestActiveRegion(hashtagObject), 'm']] # for hashtagPropagatingRegion in HashtagsClassifier._getActivityRegionsWithActivityAboveThreshold(hashtagObject): # validTimeUnits = [timeUnits[i] for i in range(hashtagPropagatingRegion[0], hashtagPropagatingRegion[1]+1)] # occurancesInActiveRegion = [(p,t) for p,t in hashtagObject['oc'] if GeneralMethods.approximateEpoch(t, TIME_UNIT_IN_SECONDS) in validTimeUnits] # occurancesInActivityRegions.append([occurancesInActiveRegion, GeneralMethods.getRandomColor()]) currentMainRangeId = 0 for occurances1, color1 in occurancesInActivityRegions: # outputFile=outputFolder+fileNameIterator.next();FileIO.createDirectoryForFile(outputFile) print outputFile ax = plt.subplot(312) subRangeId = 0 for occurances, color in occurancesInActivityRegions: if subRangeId==currentMainRangeId: color='m' timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) # if len(timeUnits)<24: # difference = 24-len(timeUnits) # timeUnits=list(timeUnits)+[timeUnits[-1]+(i+1)*HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS for i in range(difference)] # timeSeries=list(timeSeries)+[0 for i in range(difference)] # print len(timeUnits[:24]), len(timeSeries[:24]) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color) subRangeId+=1 # plt.ylim(ymax=1) plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7) ax=plt.subplot(313) subRangeId = 0 timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-') for occurances, color in occurancesInActivityRegions: if subRangeId==currentMainRangeId: color='m' timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS) plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color) subRangeId+=1 plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7) plt.subplot(311) occurancesGroupedByLattice = sorted( [(getLocationFromLid(lid.replace('_', ' ')), len(list(occs))) for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occurances1], key=itemgetter(0)), key=itemgetter(0))], key=itemgetter(1) ) points, colors = zip(*occurancesGroupedByLattice) cm = matplotlib.cm.get_cmap('cool') if len(points)>1: sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, alpha=1.0) plt.colorbar(sc) else: sc = plotPointsOnWorldMap(points, c='m', lw=0) plt.title(hashtagObject['h']+ '(%d)'%len(occurancesGroupedByLattice)) # plt.show() try: plt.savefig(outputFile); plt.clf() except: pass currentMainRangeId+=1
def temp(): hashtags, MINUTES = [], 60 for hashtagObject in FileIO.iterateJsonFromFile('americanhorrorstory'): if hashtagObject['h']=='americanhorrorstory': print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc']) occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject, timeUnit=60*60), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) totalOccurances = [] for interval, t in enumerate(sorted(occsDistributionInTimeUnits)): occs = occsDistributionInTimeUnits[t] if occs: fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName) # print interval, t, len(occs) print fileName occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occ in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occs], key=itemgetter(0)), key=itemgetter(0))] occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1)) points, colors = zip(*occurancesGroupedByLattice) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0) # plt.show() plt.savefig(fileName) plt.clf() exit()
def plotGraphsForHashtag(hashtag): for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'): MINUTES = 5 if hashtagObject['h']==hashtag: print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc']) occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) totalOccurances = [] for interval, t in enumerate(sorted(occsDistributionInTimeUnits)): occs = occsDistributionInTimeUnits[t] totalOccurances+=occs if occs: fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName) print fileName occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in totalOccurances], key=itemgetter(0)), key=itemgetter(0))] occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1)) points, colors = zip(*occurancesGroupedByLattice) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0) plt.show() # plt.savefig(fileName) plt.clf() if (interval+1)*MINUTES>=120: break break
def getSimulationFile(self): file = self.conf['simulationDataFolder']+'%s/%s_%s_%s'%(self.modelType, self.conf['noOfDaysOfSimulation'], self.conf['noOfBinsPerDay'], self.conf['noOfAreas']) FileIO.createDirectoryForFile(file) return file
def writeARFFFile(place): userVectors = GenerateDataFiles.getUserVectors(place) arffFile=ARFF.writeARFFForClustering(userVectors, place['name']) outputFileName = placesARFFFile%place['name'] FileIO.createDirectoryForFile(outputFileName) GeneralMethods.runCommand('mv %s %s'%(arffFile, outputFileName))
def plotHashtagsInOutGraphs(timeRange, outputFolder): def plotPoints(links, xlabel): cm = matplotlib.cm.get_cmap('cool') points, colors = zip(*sorted([(getLocationFromLid(k.replace('_', ' ')), v)for k, v in links.iteritems()], key=itemgetter(1))) sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, vmin=0, vmax=1) plotPointsOnWorldMap([getLocationFromLid(locationObject['id'].replace('_', ' '))], c='k', s=20, lw=0) plt.xlabel(xlabel), plt.colorbar(sc) counter=1 for locationObject in FileIO.iterateJsonFromFile(hashtagLocationInAndOutTemporalClosenessGraphFile%(outputFolder, '%s_%s'%timeRange)): point = getLocationFromLid(locationObject['id'].replace('_', ' ')) outputFile = hashtagsImagesLocationInfluencersFolder+'%s.png'%getLatticeLid([point[1], point[0]], ACCURACY); FileIO.createDirectoryForFile(outputFile) print counter;counter+=1 if not os.path.exists(outputFile): if locationObject['in_link'] and locationObject['out_link']: print outputFile plt.subplot(211) plt.title(locationObject['id']) plotPoints(locationObject['in_link'], 'Gets hashtags from these locations') plt.subplot(212) plotPoints(locationObject['out_link'], 'Sends hashtags to these locations') # plt.show() plt.savefig(outputFile); plt.clf()
def plotTimeSeries(hashtagObject): def getDataToPlot(occ): occurranceDistributionInEpochs = getOccurranceDistributionInEpochs(occ) startEpoch, endEpoch = min(occurranceDistributionInEpochs, key=itemgetter(0))[0], max(occurranceDistributionInEpochs, key=itemgetter(0))[0] dataX = range(startEpoch, endEpoch, TIME_UNIT_IN_SECONDS) occurranceDistributionInEpochs = dict(occurranceDistributionInEpochs) for x in dataX: if x not in occurranceDistributionInEpochs: occurranceDistributionInEpochs[x]=0 return zip(*sorted(occurranceDistributionInEpochs.iteritems(), key=itemgetter(0))) outputFile = hashtagsImagesFirstActiveTimeSeriesAnalysisFolder%outputFolder+'%s.png'%(hashtagObject['h']); FileIO.createDirectoryForFile(outputFile) print unicode(outputFile).encode('utf-8') timeUnits, timeSeries = getDataToPlot(hashtagObject['oc']) occurencesInActiveRegion, isFirstActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject, True) timeUnitsForActiveRegion, timeSeriesForActiveRegion = getDataToPlot(occurencesInActiveRegion) lid, count = getSourceLattice(hashtagObject['oc']) if isFirstActiveRegion and count>=MIN_OCCURRENCES_TO_DETERMINE_SOURCE_LATTICE: ax=plt.subplot(211) plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnits), timeSeries, '-') if not isFirstActiveRegion: plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, 'o', c='r') else: plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, 'o', c='k') plt.setp(ax.get_xticklabels(), rotation=30, fontsize=10) plt.title(hashtagObject['h'] + '(%s)'%count) ax=plt.subplot(212) plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, '-') plt.setp(ax.get_xticklabels(), rotation=30, fontsize=10) # if isFirstActiveRegion: # lid, count = getSourceLattice(hashtagObject['oc']) # if count>=MIN_OCCURRENCES_TO_DETERMINE_SOURCE_LATTICE: # print lid, count # plt.show() plt.savefig(outputFile); plt.clf()
def plotSharingProbabilityAndTemporalClosenessScoresOnMap(timeRange, outputFolder): i = 1 for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(outputFolder,'%s_%s'%timeRange)): latticePoint = getLocationFromLid(latticeObject['id'].replace('_', ' ')) latticeId = getLatticeLid([latticePoint[1], latticePoint[0]], LATTICE_ACCURACY) plt.subplot(211) plt.title(latticeId) LatticeGraphPlots.plotLatticeSharingProbabilityOnMap(LatticeGraph.typeSharingProbability, latticeObject) plt.subplot(212) LatticeGraphPlots.plotLatticeTemporalClosenessScoresOnMap(LatticeGraph.typeTemporalCloseness, latticeObject) plt.show() outputFile = hashtagsImagesGraphAnalysisFolder%outputFolder+'%s_and_%s/%s.png'%(LatticeGraph.typeSharingProbability['id'], LatticeGraph.typeTemporalCloseness['id'], latticeId); FileIO.createDirectoryForFile(outputFile) print i, outputFile; i+=1