Esempio n. 1
0
def getLocationDistributionPlots(place):
    for clustering in iteraterUserClusterings(place):
        for location in locationToUserMapIterator(place): 
            print clustering[0], location['location']
            fileName=placesImagesFolder%place['name']+str(clustering[0])+'/'+ location['location'].replace(' ', '_').replace('.', '+')+'.png'
            FileIO.createDirectoryForFile(fileName)
            getPerLocationDistributionPlots(clustering, location, fileName)
Esempio n. 2
0
def drawAllCheckinPlotsByVisitingClassesUsingDemography(model, **conf):
    plotsFolder = conf['plotsFolder']+'byVisitingClassesUsingDemography/'
    for locationId, location in model.locationsCheckinsMap.iteritems():
        if location['checkins']: 
            locationObject = Location.getObjectFromDict(location['object'])
            plotsFile = '%s%s/%s'%(plotsFolder, Location.getLocationClassBasedOnVisitingProbability(locationObject),locationId+'.png')
            FileIO.createDirectoryForFile(plotsFile)
            checkinsByBinsAndDemographies = defaultdict(dict)
            demographColorMap = {}
            for day, binData in location['checkins'].iteritems():
                for bin, checkins in binData.iteritems():
                    bin=int(bin)
                    for user in checkins:
                        demographyId = model.userMap[user]['object']['demography_id']
                        demographColorMap[demographyId] = model.userMap[user]['object']['demography_color']
                        if bin not in checkinsByBinsAndDemographies[demographyId]: checkinsByBinsAndDemographies[demographyId][bin]=0
                        checkinsByBinsAndDemographies[demographyId][bin]+=1
#            for bin in checkinsByBinsAndDemographies:
#                for demographyId in demographColorMap:
#                    plt.scatter([bin], [checkinsByBinsAndDemographies[bin][demographyId]], color=demographColorMap[demographyId])
            for demographyId, data in checkinsByBinsAndDemographies.iteritems():
#                print smooth([data[k] for k in sorted(data)], 4)
                plt.fill_between(sorted(data.keys()), smooth([data[k] for k in sorted(data)], 10)[:len(data)], color=demographColorMap[demographyId], alpha=0.65)
#               plt.hist([k for k, v in checkinsByBins.iteritems() for i in range(v)], conf['noOfBinsPerDay'], normed=True)
            plt.title(str(locationObject.visitingProbability))
            plt.savefig(plotsFile)
            plt.clf()
Esempio n. 3
0
 def writeClusterKML():
     kml = SpotsKML()
     outputKMLFile='%s/clusters.kml'%placesAnalysisFolder%place['name']
     for data in FileIO.iterateJsonFromFile(placesUserClusterFeaturesFile%place['name']):
         clusterId, color, features = data
         kml.addLocationPointsWithTitles([(getLocationFromLid(f[0].replace('_', ' ')), f[2]) for f in features[:noOfFeatures]], color=color)
         FileIO.createDirectoryForFile(outputKMLFile)
         kml.write(outputKMLFile)
Esempio n. 4
0
def writeUserClusterKMLs(place):
    clustering = getUserClustering(place, place.get('k'))
    colorMap = clustering[3]
    for clusterId, details in sorted(getUserClusteringDetails(place, clustering).iteritems(), key=lambda k: int(k[0])):
        kml = SpotsKML()
        kml.addLocationPointsWithTitles([(getLocationFromLid(lid), unicode(name).encode('utf-8')) for lid, name, _ in details['locations'][:5]], color=colorMap[clusterId])
        outputKMLFile=placesKMLsFolder%place['name']+'locations/userClusters/%s/%s.kml'%(str(clustering[0]), str(clusterId))
        FileIO.createDirectoryForFile(outputKMLFile)
        kml.write(outputKMLFile)
Esempio n. 5
0
def getLocationsCheckinDistribution(place):
    checkinDistribution = {}
    for location in locationToUserMapIterator(place):
        checkinDistribution[location['location']]=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()])
    dataX, dataY = getDataDistribution(checkinDistribution.values())
    plt.loglog(dataX,dataY)
    outputFile = placesAnalysisFolder%place['name']+'locationsCheckinDistribution.png'
    FileIO.createDirectoryForFile(outputFile)
    plt.savefig(outputFile)
Esempio n. 6
0
def writeARFFFile(place):
    userVectors = defaultdict(dict)
    locationToUserMap = dict((l['location'], l) for l in locationToUserMapIterator(place, minCheckins=50))
    for lid in locationToUserMap:
        for user in locationToUserMap[lid]['users']: 
            userVectors[user][lid.replace(' ', '_')]=sum(len(locationToUserMap[lid]['users'][user][d][db]) for d in locationToUserMap[lid]['users'][user] for db in locationToUserMap[lid]['users'][user][d])
    for user in userVectors.keys()[:]: 
        if sum(userVectors[user].itervalues())<place['minUserCheckins']: del userVectors[user]
    arffFile=ARFF.writeARFFForClustering(userVectors, place['name'])
    outputFileName = getARFFFileName(place)
    FileIO.createDirectoryForFile(outputFileName)
    GeneralMethods.runCommand('mv %s %s'%(arffFile, outputFileName))
Esempio n. 7
0
 def plotGaussianGraphsForClusters(place):
     for location in Analysis.iterateLocationsWithClusterDetails(place):
         total = location['total']
         clustersInfo = location['clustersInfo']
         for clusterId, data in clustersInfo.iteritems():
             mean, std, clusterSum, color = data['mean'], data['std'], data['clusterSum'], data['color']
             if std!=0: plotNorm(clusterSum/total, mean, std, color=color, label=str(clusterId))
             else: plotNorm(clusterSum/total, mean, random.uniform(0.1, 0.5), color=color, label=str(clusterId))
         plt.xlim(xmin=0, xmax=23); plt.legend()
         plt.title(location['name'])
         fileName = '/'.join([placesGaussianImagesFolder%place['name'], getLocationType(location), location['location'].replace(' ', '_').replace('.', '+')+'.png'])
         print fileName
         FileIO.createDirectoryForFile(fileName)
         plt.savefig(fileName), plt.clf()
Esempio n. 8
0
 def hashtag_groups_dot_files(association_measure_file=f_fisher_exact_association_measure):
     output_file_format = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+\
                                                         '/'+association_measure_file.split('/')[-1]+'/%s.dot'
     for line_no, data in\
             enumerate(FileIO.iterateJsonFromFile(association_measure_file, remove_params_dict=True)):
         _, _, edges = data
         graph = nx.Graph()
         for edge in edges: 
             u,v,attr_dict = edge
             u = unicode(u).encode('utf-8')
             v = unicode(v).encode('utf-8')
             graph.add_edge(u,v, attr_dict)
         output_file = output_file_format%line_no
         print 'Writing file: ', output_file
         FileIO.createDirectoryForFile(output_file)
         nx.write_dot(graph, output_file)
Esempio n. 9
0
def drawAllCheckinPlotsByVisitingClasses(model, **conf):
    plotsFolder = conf['plotsFolder']+'byVisitingClasses/'
    for locationId, location in model.locationsCheckinsMap.iteritems():
        if location['checkins']: 
            locationObject = Location.getObjectFromDict(location['object'])
            plotsFile = '%s%s/%s'%(plotsFolder, Location.getLocationClassBasedOnVisitingProbability(locationObject),locationId+'.png')
            FileIO.createDirectoryForFile(plotsFile)
            checkinsByBins = defaultdict(int)
            for day, binData in location['checkins'].iteritems():
                for bin, checkins in binData.iteritems():
                    checkinsByBins[int(bin)]+=len(checkins)
#            print len(checkinsByBins.keys()), len(smooth(checkinsByBins.values(), 1)[:48])
            plt.plot(checkinsByBins.keys(), smooth(checkinsByBins.values(), 1))
#            plt.hist([k for k, v in checkinsByBins.iteritems() for i in range(v)], conf['noOfBinsPerDay'], normed=True)
            plt.title(str(locationObject.visitingProbability))
            plt.savefig(plotsFile)
            plt.clf()
Esempio n. 10
0
    def plot_locations_influence_on_world_map(ltuo_model_id_and_hashtag_tag, noOfInfluencers=10, percentage_of_locations=0.15):
        input_locations = [
                               ('40.6000_-73.9500', 'new_york'),
                               ('33.3500_-118.1750', 'los_angeles'),
                               ('29.7250_-97.1500', 'austin'),
                           ('30.4500_-95.7000', 'college_station'),
                            ('-22.4750_-42.7750', 'rio'),
                           ('51.4750_0.0000', 'london'),
                           ('-23.2000_-46.4000', 'sao_paulo')
                         ] 
        for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag:
            tuo_location_and_tuo_neighbor_location_and_locations_influence_score = \
                    Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers=None, influence_type=InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE)
            for input_location, label in input_locations:
                for location, tuo_neighbor_location_and_locations_influence_score in \
                        tuo_location_and_tuo_neighbor_location_and_locations_influence_score:
                    if input_location==location:
                        input_location = getLocationFromLid(input_location.replace('_', ' '))
                        output_file = fld_results%GeneralMethods.get_method_id() + '/%s_%s/%s.png'%(model_id, hashtag_tag, label)
                        number_of_outgoing_influences = int(len(tuo_neighbor_location_and_locations_influence_score)*percentage_of_locations)
                        if number_of_outgoing_influences==0: number_of_outgoing_influences=len(tuo_neighbor_location_and_locations_influence_score)
                        locations = zip(*tuo_neighbor_location_and_locations_influence_score)[0][:number_of_outgoing_influences]
                        locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations]
#                        locations = filter(lambda location: isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY), locations)
                        if locations:
                            _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#000000', c='#FF00FF', returnBaseMapObject=True, lw = 0)
#                            _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#FF00FF', returnBaseMapObject=True, lw = 0)
                            for location in locations: 
    #                            if isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY): 
                                m.drawgreatcircle(location[1], location[0], input_location[1], input_location[0], color='#FAA31B', lw=1., alpha=0.5)
#                            plotPointsOnWorldMap([input_location], blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0)
                            plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#000000', c='#003CFF', s=40, lw = 0)
#                            plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0)
                            FileIO.createDirectoryForFile(output_file)
                            print output_file
                            savefig(output_file)
                            plt.clf()
                        else:
                            GeneralMethods.runCommand('rm -rf %s'%output_file)
                        break
Esempio n. 11
0
def getLocationPlots(place, clusterOVLType, type='scatter'):
    clustering = getUserClustering(place, place.get('k'))
    validClusters = getUserClusteringDetails(place, clustering).keys()
    def scatterPlot(clustering, location, fileName):
        userClusterMap = {}
        for clusterId, users in clustering[2]['clusters'].iteritems():
            for user in users: 
                if user in location['users']: userClusterMap[user]=clusterId
        scatterData = defaultdict(dict)
        clusterMap = clustering[3]
        for user, userVector in location['users'].iteritems():
            if user in userClusterMap:
                for d in userVector:
                    for db in userVector[d]:
                        for h in [(datetime.datetime.fromtimestamp(ep).hour-6)%24 for ep in userVector[d][db]]:
                            if h not in scatterData[userClusterMap[user]]: scatterData[userClusterMap[user]][h]=0
                            scatterData[userClusterMap[user]][h]+=1
#        total = float(sum([k for cluster, clusterInfo in scatterData.iteritems() for k, v in clusterInfo.iteritems() for i in range(v)]))
        for cluster, clusterInfo in scatterData.iteritems(): 
            if cluster in validClusters: 
                if type=='normal':
                    data = [k for k, v in clusterInfo.iteritems() for i in range(v)]
                    mean, std = np.mean(data), np.std(data)
                    if std!=0: plotNorm(sum(data), mean, std, color=clusterMap[cluster])
                    else: plotNorm(sum(data), mean, random.uniform(0.1, 0.5), color=clusterMap[cluster])
                elif type=='scatter': plt.scatter(clusterInfo.keys(), clusterInfo.values(), color=clusterMap[cluster], label=cluster)
        plt.title('%s (%s)'%(location['name'],location['location'])),plt.legend()
#        plt.show()
        plt.xlim(xmin=0,xmax=24)
        plt.savefig(fileName), plt.clf()
#    for clustering in iteraterUserClusterings(place):
    for location in locationToUserMapIterator(place, minCheckins=place['minLocationCheckinsForPlots']): 
#    for location in iterateLocationsByOVLAndClustersType(place, clusterOVLType): 
#        location = location['details']
        print clustering[0], location['location']
        fileName=placesImagesFolder%place['name']+'%s/'%type+str(clustering[0])+'/'+ location['location'].replace(' ', '_').replace('.', '+')+'.png'
        FileIO.createDirectoryForFile(fileName)
        scatterPlot(clustering, location, fileName)
Esempio n. 12
0
 def plot_maps_for_every_hour():
     MINUTES = 15
     hashtags = ['ripstevejobs', 'cnbcdebate']
     map_from_hashtag_to_subplot = dict([('ripstevejobs', 211), ('cnbcdebate', 212)])
     map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag = defaultdict(dict)
     for hashtag in hashtags:
         for hashtag_object in FileIO.iterateJsonFromFile('./data/%s.json'%hashtag):
             map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time =  getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtag_object), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
             tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time = sorted(map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time.iteritems(), key=itemgetter(0))
             epoch_starting_time_unit = tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time[0][0]
             epoch_ending_time_unit = epoch_starting_time_unit+24*60*60
             for epoch_time_unit, tuples_of_location_and_epoch_occurrence_time in tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time:
                 if epoch_time_unit<=epoch_ending_time_unit:
                     if tuples_of_location_and_epoch_occurrence_time:
                         epoch_lag = epoch_time_unit - epoch_starting_time_unit
                         tuples_of_location_and_epoch_occurrence_time = sorted(tuples_of_location_and_epoch_occurrence_time, key=itemgetter(1))
                         map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag][hashtag] = [(getLatticeLid(location, 0.145), epoch_occurrence_time-epoch_starting_time_unit)for location, epoch_occurrence_time in tuples_of_location_and_epoch_occurrence_time]
     map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag = defaultdict(list)
     GeneralMethods.runCommand('rm -rf ./images/plot_maps_for_every_hour/')
     for epoch_lag in sorted(map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag):
         file_world_map_plot = './images/plot_maps_for_every_hour/%s.png'%(epoch_lag)
         print file_world_map_plot
         map_from_hashtag_to_tuples_of_location_and_epoch_lag = map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag]
         for hashtag, tuples_of_location_and_epoch_lag in map_from_hashtag_to_tuples_of_location_and_epoch_lag.iteritems():
             map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag[hashtag]+=tuples_of_location_and_epoch_lag
         for hashtag, accumulated_tuples_of_location_and_epoch_lag in map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag.iteritems():
             plt.subplot(map_from_hashtag_to_subplot[hashtag])
             tuples_of_location_and_epoch_max_lag= [(location, max(zip(*iterator_of_tuples_of_location_and_epoch_lag)[1]))
                                for location, iterator_of_tuples_of_location_and_epoch_lag in 
                                groupby(sorted(accumulated_tuples_of_location_and_epoch_lag, key=itemgetter(0)), key=itemgetter(0))
                             ]
             locations, colors = zip(*[(getLocationFromLid(location.replace('_', ' ')), (epoch_lag+MINUTES*60)-epoch_max_lag) for location, epoch_max_lag in sorted(tuples_of_location_and_epoch_max_lag, key=itemgetter(1), reverse=True)])
             plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap=matplotlib.cm.cool, lw = 0, vmax=epoch_lag+MINUTES*60)
             plt.title('%s      (%s hours)'%(hashtag, (epoch_lag+MINUTES*60)/(60.*60)))
 #        plt.show()
         FileIO.createDirectoryForFile(file_world_map_plot)
         plt.savefig(file_world_map_plot)
         plt.clf()
Esempio n. 13
0
 def utm_object_analysis():
     ltuo_utm_id_and_num_of_neighbors_and_mean_common_h_count = []
     output_file = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'.df'
     so_valid_utm_ids = set()
     for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, True): 
         so_valid_utm_ids.add(utm_object['utm_id'])
     for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, True):
         so_valid_nei_utm_ids = set(utm_object['mf_nei_utm_id_to_common_h_count']).intersection(so_valid_utm_ids)
         mean_num_of_common_h_count = np.mean([utm_object['mf_nei_utm_id_to_common_h_count'][nei_utm_id] 
                                            for nei_utm_id in so_valid_nei_utm_ids])
         ltuo_utm_id_and_num_of_neighbors_and_mean_common_h_count.append([utm_object['utm_id'], 
                                                                          len(so_valid_nei_utm_ids),
                                                                          mean_num_of_common_h_count])
     utm_ids, num_of_neighbors, mean_common_h_count = zip(*ltuo_utm_id_and_num_of_neighbors_and_mean_common_h_count)
     od = rlc.OrdDict([
                       ('utm_ids', robjects.StrVector(utm_ids)),
                       ('num_of_neighbors', robjects.FloatVector(num_of_neighbors)),
                       ('mean_common_h_count', robjects.FloatVector(mean_common_h_count))
                     ])
     df = robjects.DataFrame(od)
     FileIO.createDirectoryForFile(output_file)
     print 'Saving df to: ', output_file
     df.to_csvfile(output_file)
Esempio n. 14
0
    def locations_at_top_and_bottom(model_ids, no_of_locations=5):
        for model_id in model_ids:
            output_file_format = analysis_folder+'%s/'%(GeneralMethods.get_method_id())+'%s/%s.json'
            input_locations = [ 
#                                ('40.6000_-73.9500', 'new_york'), 
                                ('30.4500_-95.7000', 'college_station'), 
                            ] 
            tuo_location_and_tuo_neighbor_location_and_influence_score = \
                Experiments.load_tuo_location_and_tuo_neighbor_location_and_pure_influence_score(model_id)
            for input_location, label in input_locations:
                for location, tuo_neighbor_location_and_influence_score in \
                        tuo_location_and_tuo_neighbor_location_and_influence_score:
                    if input_location==location:
                        output_file = output_file_format%(input_location, model_id)
                        GeneralMethods.runCommand('rm -rf %s'%output_file)
                        FileIO.createDirectoryForFile(output_file)
                        FileIO.writeToFileAsJson("Bottom:", output_file)
                        for neighbor_location_and_influence_score in tuo_neighbor_location_and_influence_score[:no_of_locations]:
                            FileIO.writeToFileAsJson(neighbor_location_and_influence_score+[''], output_file)
                        FileIO.writeToFileAsJson("Top:", output_file)
                        for neighbor_location_and_influence_score in \
                                reversed(tuo_neighbor_location_and_influence_score[-no_of_locations:]):
                            FileIO.writeToFileAsJson(neighbor_location_and_influence_score+[''], output_file)
Esempio n. 15
0
def plotHastagClasses(timeRange, folderType):
    def getFileName():
        for i in combinations('abcedfghijklmnopqrstuvwxyz',2): yield ''.join(i)+'.png'
    count=1
#    for hashtagObject in FileIO.iterateJsonFromFile(hashtagsWithoutEndingWindowFile%(folderType,'%s_%s'%timeRange)):
    for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('testing_world','%s_%s'%(2,11))):
#        HashtagsClassifier.classify(hashtagObject)
        print count; count+=1
#        if hashtagObject['h']=='ripamy':
        classId = HashtagsClassifier.classify(hashtagObject)
        if classId!=None:
            classId = 1
            outputFile = hashtagsImagesHashtagsClassFolder%folderType+'%s/%s.png'%(classId, hashtagObject['h']); FileIO.createDirectoryForFile(outputFile)
            fileNameIterator = getFileName()
            timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)
            occurancesInActivityRegions = [[getOccuranesInHighestActiveRegion(hashtagObject), 'm']]
#            for hashtagPropagatingRegion in HashtagsClassifier._getActivityRegionsWithActivityAboveThreshold(hashtagObject):
#                validTimeUnits = [timeUnits[i] for i in range(hashtagPropagatingRegion[0], hashtagPropagatingRegion[1]+1)]
#                occurancesInActiveRegion = [(p,t) for p,t in hashtagObject['oc'] if GeneralMethods.approximateEpoch(t, TIME_UNIT_IN_SECONDS) in validTimeUnits]
#                occurancesInActivityRegions.append([occurancesInActiveRegion, GeneralMethods.getRandomColor()])
            
            currentMainRangeId = 0
            for occurances1, color1 in occurancesInActivityRegions:
#                outputFile=outputFolder+fileNameIterator.next();FileIO.createDirectoryForFile(outputFile)
                print outputFile
                ax = plt.subplot(312)
                subRangeId = 0
                for occurances, color in occurancesInActivityRegions:
                    if subRangeId==currentMainRangeId: color='m'
                    timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)
#                    if len(timeUnits)<24: 
#                        difference = 24-len(timeUnits)
#                        timeUnits=list(timeUnits)+[timeUnits[-1]+(i+1)*HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS for i in range(difference)]
#                        timeSeries=list(timeSeries)+[0 for i in range(difference)]
#                    print len(timeUnits[:24]), len(timeSeries[:24])
                    plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color)
                    subRangeId+=1
#                plt.ylim(ymax=1)
                plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7)
            
                ax=plt.subplot(313)
                subRangeId = 0
                timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)
                plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-')
                for occurances, color in occurancesInActivityRegions:
                    if subRangeId==currentMainRangeId: color='m'
                    timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)
                    plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color)
                    subRangeId+=1
                plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7)
                
                plt.subplot(311)
                occurancesGroupedByLattice = sorted(
                                                    [(getLocationFromLid(lid.replace('_', ' ')), len(list(occs))) for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occurances1], key=itemgetter(0)), key=itemgetter(0))],
                                                    key=itemgetter(1)
                                                    )
                points, colors = zip(*occurancesGroupedByLattice)
                cm = matplotlib.cm.get_cmap('cool')
                if len(points)>1: 
                    sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, alpha=1.0)
                    plt.colorbar(sc)
                else: sc = plotPointsOnWorldMap(points, c='m', lw=0)
                plt.title(hashtagObject['h']+ '(%d)'%len(occurancesGroupedByLattice))
#                plt.show()
                try:
                    plt.savefig(outputFile); plt.clf()
                except: pass
                currentMainRangeId+=1
Esempio n. 16
0
    def temp():
        hashtags, MINUTES = [], 60
        for hashtagObject in FileIO.iterateJsonFromFile('americanhorrorstory'):
            if hashtagObject['h']=='americanhorrorstory':
                print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc'])
                occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject, timeUnit=60*60), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                totalOccurances = []
                for interval, t in enumerate(sorted(occsDistributionInTimeUnits)):
                    occs = occsDistributionInTimeUnits[t]
                    if occs:
                        fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName)
#                        print interval, t, len(occs)
                        print fileName
                        occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occ in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occs], key=itemgetter(0)), key=itemgetter(0))]
                        occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1))
                        points, colors = zip(*occurancesGroupedByLattice)
                        plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0)
#                        plt.show()
                        plt.savefig(fileName)
                        plt.clf()
                exit()
Esempio n. 17
0
 def plotGraphsForHashtag(hashtag):
     for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'):
         MINUTES = 5
         if hashtagObject['h']==hashtag:
             print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc'])
             occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
             totalOccurances = []
             for interval, t in enumerate(sorted(occsDistributionInTimeUnits)):
                 occs = occsDistributionInTimeUnits[t]
                 totalOccurances+=occs
                 if occs:
                     fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName)
                     print fileName
                     occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in totalOccurances], key=itemgetter(0)), key=itemgetter(0))]
                     occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1))
                     points, colors = zip(*occurancesGroupedByLattice)
                     plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0)
                     plt.show()
 #                    plt.savefig(fileName)
                     plt.clf()
                 if (interval+1)*MINUTES>=120: break
             break
Esempio n. 18
0
 def getSimulationFile(self): 
     file = self.conf['simulationDataFolder']+'%s/%s_%s_%s'%(self.modelType, self.conf['noOfDaysOfSimulation'], self.conf['noOfBinsPerDay'], self.conf['noOfAreas'])
     FileIO.createDirectoryForFile(file)
     return file
Esempio n. 19
0
 def writeARFFFile(place):
     userVectors = GenerateDataFiles.getUserVectors(place)
     arffFile=ARFF.writeARFFForClustering(userVectors, place['name'])
     outputFileName = placesARFFFile%place['name']
     FileIO.createDirectoryForFile(outputFileName)
     GeneralMethods.runCommand('mv %s %s'%(arffFile, outputFileName))
Esempio n. 20
0
def plotHashtagsInOutGraphs(timeRange, outputFolder):
    def plotPoints(links, xlabel):
        cm = matplotlib.cm.get_cmap('cool')
        points, colors = zip(*sorted([(getLocationFromLid(k.replace('_', ' ')), v)for k, v in links.iteritems()], key=itemgetter(1)))
        sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, vmin=0, vmax=1)
        plotPointsOnWorldMap([getLocationFromLid(locationObject['id'].replace('_', ' '))], c='k', s=20, lw=0)
        plt.xlabel(xlabel), plt.colorbar(sc)
    counter=1
    for locationObject in FileIO.iterateJsonFromFile(hashtagLocationInAndOutTemporalClosenessGraphFile%(outputFolder, '%s_%s'%timeRange)): 
        point = getLocationFromLid(locationObject['id'].replace('_', ' '))
        outputFile = hashtagsImagesLocationInfluencersFolder+'%s.png'%getLatticeLid([point[1], point[0]], ACCURACY); FileIO.createDirectoryForFile(outputFile)
        print counter;counter+=1
        if not os.path.exists(outputFile):
            if locationObject['in_link'] and locationObject['out_link']:
                print outputFile
                plt.subplot(211)
                plt.title(locationObject['id'])
                plotPoints(locationObject['in_link'], 'Gets hashtags from these locations')
                plt.subplot(212)
                plotPoints(locationObject['out_link'], 'Sends hashtags to these locations')
#                plt.show()
                plt.savefig(outputFile); plt.clf()
Esempio n. 21
0
    def plotTimeSeries(hashtagObject):
        def getDataToPlot(occ):
            occurranceDistributionInEpochs = getOccurranceDistributionInEpochs(occ)
            startEpoch, endEpoch = min(occurranceDistributionInEpochs, key=itemgetter(0))[0], max(occurranceDistributionInEpochs, key=itemgetter(0))[0]
            dataX = range(startEpoch, endEpoch, TIME_UNIT_IN_SECONDS)
            occurranceDistributionInEpochs = dict(occurranceDistributionInEpochs)
            for x in dataX: 
                if x not in occurranceDistributionInEpochs: occurranceDistributionInEpochs[x]=0
            return zip(*sorted(occurranceDistributionInEpochs.iteritems(), key=itemgetter(0)))
        
        outputFile = hashtagsImagesFirstActiveTimeSeriesAnalysisFolder%outputFolder+'%s.png'%(hashtagObject['h']); FileIO.createDirectoryForFile(outputFile)
        print unicode(outputFile).encode('utf-8')
        
        timeUnits, timeSeries = getDataToPlot(hashtagObject['oc'])
        occurencesInActiveRegion, isFirstActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject, True)
        timeUnitsForActiveRegion, timeSeriesForActiveRegion = getDataToPlot(occurencesInActiveRegion)
        lid, count = getSourceLattice(hashtagObject['oc'])
        if isFirstActiveRegion and count>=MIN_OCCURRENCES_TO_DETERMINE_SOURCE_LATTICE: 
            ax=plt.subplot(211)
            plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnits), timeSeries, '-')
            if not isFirstActiveRegion: plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, 'o', c='r')
            else: plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, 'o', c='k')
            plt.setp(ax.get_xticklabels(), rotation=30, fontsize=10)
            plt.title(hashtagObject['h'] + '(%s)'%count)
            ax=plt.subplot(212)
            plt.plot_date(map(datetime.datetime.fromtimestamp, timeUnitsForActiveRegion), timeSeriesForActiveRegion, '-')
            plt.setp(ax.get_xticklabels(), rotation=30, fontsize=10)
    #        if isFirstActiveRegion:
    #            lid, count = getSourceLattice(hashtagObject['oc'])
    #            if count>=MIN_OCCURRENCES_TO_DETERMINE_SOURCE_LATTICE:
    #                print lid, count
#            plt.show()
            plt.savefig(outputFile); 
            plt.clf()
Esempio n. 22
0
 def plotSharingProbabilityAndTemporalClosenessScoresOnMap(timeRange, outputFolder):
     i = 1
     for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(outputFolder,'%s_%s'%timeRange)):
         latticePoint = getLocationFromLid(latticeObject['id'].replace('_', ' '))
         latticeId = getLatticeLid([latticePoint[1], latticePoint[0]], LATTICE_ACCURACY)
         plt.subplot(211)
         plt.title(latticeId)
         LatticeGraphPlots.plotLatticeSharingProbabilityOnMap(LatticeGraph.typeSharingProbability, latticeObject)
         plt.subplot(212)
         LatticeGraphPlots.plotLatticeTemporalClosenessScoresOnMap(LatticeGraph.typeTemporalCloseness, latticeObject)
         plt.show()
         outputFile = hashtagsImagesGraphAnalysisFolder%outputFolder+'%s_and_%s/%s.png'%(LatticeGraph.typeSharingProbability['id'], LatticeGraph.typeTemporalCloseness['id'], latticeId); FileIO.createDirectoryForFile(outputFile)
         print i, outputFile; i+=1