def retreive(self): min_time = None max_time = None for tw in self.TweetIter: TIME = time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y") if (tw['lat']<=self.bbox[0][0] and tw['lat']>=self.bbox[1][0]) and (tw['lon']>=self.bbox[0][1] and tw['lon']<=self.bbox[1][1] and TIME>=self.timerange[0]): TEXT = '@'+tw['screen_name']+'\t _tweeted_ \t'+tw['text'] + '\t at_time \t' + gmt_to_local(TIME,make_string=True) self.tweets.append(TEXT) if min_time==None: min_time = TIME elif min_time>TIME: min_time = TIME if max_time==None: max_time = TIME elif max_time<TIME: max_time = TIME else: pass if TIME>self.timerange[1]: break else: pass #self.min_time = gmt_to_local(min_time,make_string=True) #self.max_time = gmt_to_local(max_time,make_string=True) return self.tweets
def retreive(self): for tw in self.TweetIter: if self.timerange!=None or self.bbox!=None: item = {} item['screen_name'] = tw.split('\t _tweeted_ \t')[0] item['text'] = tw.split('\t _tweeted_ \t')[1].split('\t at_time \t')[0] item['created_at'] = tw.split('\t _tweeted_ \t')[1].split('\t at_time \t')[1] tw = item else: tw['created_at'] = gmt_to_local(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"),make_string=True) words_in_tweet = filter(lambda x: x.isalnum() and x not in ['USERNAME','URL','PHONENUMBER','TIME','NUMBER'],self.tokenize(tw['text'])) if any(w in self.keywords for w in words_in_tweet): TEXT = '@'+tw['screen_name']+'\t _tweeted_ \t'+tw['text'] + '\t at_time \t' + tw['created_at'] self.tweets.append(TEXT) else: pass return self.tweets
def __iter__(self): #Initialize variables with class's current status time_start = self.time_start tw = self.tw UniqueUids = self.UniqueUids #Conditional variable needs initialization UserUnique = False for item in self.ObjIter: #Tweets with no GPS are assigned place_name geocodes if item['lon']==0: item['lon'],item['lat'] = GetGeocode(item['place']) #Block all tweets outside place grid if item['lon']==0 or not (item['lon']>=self.Grid[0] and item['lon']<=self.Grid[2] and item['lat']>=self.Grid[1] and item['lat']<=self.Grid[3]): continue #Unfold tweet into its item variables and destroy tweet TEXT = item['text'] UID = item['user_id'] PLACE = item['place'] LOC = (float(item['lat']),float(item['lon'])) TIME = time.strptime(item['created_at'],"%a %b %d %H:%M:%S +0000 %Y") CREATED_AT = gmt_to_local(TIME,make_string=True) SCREEN_NAME = item['screen_name'] del item #TimeWindow update shiftWindow = ((TIME<self.time_start) or (time.mktime(TIME)-time.mktime(self.time_start)>self.timeWindow)) and self.timeWindow!=-1 if shiftWindow: #Capture a new timeWindow if len(self.tw['LOC'])!=0: #Create timestamps for start(stop) if timeWindow captured localstart = time.strftime('%d%b%HHR%MMN',time.localtime(time.mktime(self.time_start)+time.mktime(time.localtime())-time.mktime(time.gmtime()))) localend = time.strftime('%d%b%HHR%MMN',time.localtime(time.mktime(TIME)+time.mktime(time.localtime())-time.mktime(time.gmtime()))) #Yield yield {'LOC':self.tw['LOC'],'TEXT':self.tw['TEXT'],'TimeWindow':[localstart,localend],'PLACE':self.tw['PLACE'],'CREATED_AT':self.tw['CREATED_AT'],'SCREEN_NAME':self.tw['SCREEN_NAME']} #Welcome new timeHashed self.time_start = TIME self.UniqueUids = [] self.tw['LOC'] = [] self.tw['TEXT'] = [] self.tw['PLACE'] = [] self.tw['CREATED_AT'] = [] self.tw['SCREEN_NAME'] = [] #Check if UID has tweeted in this timeWindow before if UID not in self.UniqueUids and self.UsersUnique: self.UniqueUids += [UID] UserUnique = True #Write LOC and TEXT values to tweet dictionary if (self.UsersUnique and UserUnique) or (not self.UsersUnique): self.tw['LOC'].append(LOC) self.tw['TEXT'].append(TEXT) self.tw['PLACE'].append(PLACE) self.tw['CREATED_AT'].append(CREATED_AT) self.tw['SCREEN_NAME'].append(SCREEN_NAME) UserUnique = False if len(self.tw['LOC'])>0: #Create timestamps for start(stop) if timeWindow captured localstart = time.strftime('%d%b%HHR%MMN',time.localtime(time.mktime(self.time_start)+time.mktime(time.localtime())-time.mktime(time.gmtime()))) localend = time.strftime('%d%b%HHR%MMN',time.localtime(time.mktime(TIME)+time.mktime(time.localtime())-time.mktime(time.gmtime()))) yield {'LOC':self.tw['LOC'],'TEXT':self.tw['TEXT'],'TimeWindow':[localstart,localend],'PLACE':self.tw['PLACE'],'CREATED_AT':self.tw['CREATED_AT'],'SCREEN_NAME':self.tw['SCREEN_NAME']}
def print_vocabulary_report(db,scale=60*20,**kwargs): print "COLLECTING TWEETS...." TS = TweetSnap(db=db,timeWindow = scale,Placename2Geocode=False) print "COLLECTION OVER...." TIME_START = kwargs.get("TIME_START",time.gmtime(0)) TIME_END = kwargs.get("TIME_END",time.gmtime(time.time())) HotWordSize = kwargs.get("HotWordSize",8) if isinstance(TIME_START,str): TIME_START = time.gmtime(time.mktime(time.strptime(TIME_START,"%d %b %H:%M %Z %Y"))) if isinstance(TIME_END,str): TIME_END = time.gmtime(time.mktime(time.strptime(TIME_END,"%d %b %H:%M %Z %Y"))) TIME_DIFF = time.mktime(TIME_START) - time.mktime(TS.time_start) if TIME_DIFF>0: TS.move_on(TIME_DIFF-scale) volume = [] HotWordsList = [] ColorGradient = {} TweetCountDict = {} TimeList = [] while (TS.time_start<TIME_END and not TS.end): #Capture nextSnap and initialize time_start of next snap snap = TS.next() timeWindow = gmt_to_local(TS.time_start,make_string=True,format='%a %H:%M') #Volume of tweets volume.append(len(snap['LOC'])) #HotWords List Vocab_dict = dict(get_vocabulary(snap['TEXT']).most_common(HotWordSize)) TimeList.append(timeWindow) ColorGradient[timeWindow] = {} for word in Vocab_dict.keys(): ColorGradient[timeWindow][word] = Vocab_dict[word]/float(sum(Vocab_dict.values())) if word in TweetCountDict.keys(): TweetCountDict[word] += Vocab_dict[word] else: TweetCountDict[word] = Vocab_dict[word] print "LOOPING2" SortedTweetCount = sorted(TweetCountDict.iteritems(),key=operator.itemgetter(1)) WordList = [item[0] for item in SortedTweetCount] TweetCountArray = np.array([item[1] for item in SortedTweetCount],dtype=int) del SortedTweetCount ColorMap = np.empty([len(WordList),len(TimeList)],dtype=float) for rw,word in enumerate(WordList): for cl,timeWindow in enumerate(TimeList): if word in ColorGradient[timeWindow].keys(): ColorMap[rw][cl] = ColorGradient[timeWindow][word] else: ColorMap[rw][cl] = 0 ###PRINT RESULTS gs = gridspec.GridSpec(2,2,width_ratios=[1,2],height_ratios=[1,4]) gs.update(left=0.05,right=0.48,wspace=0.00000000000000000000000000000000000000005,hspace=0.00000000000000000000000000000000000000005) fig1 = plt.figure(figsize=(36,90),dpi=200) ax0 = fig1.add_subplot(gs[0,1]) ax1 = fig1.add_subplot(gs[1,1]) ax2 = fig1.add_subplot(gs[1,0]) ax3 = fig1.add_subplot(gs[0,0]) #TweetVolume ax0.grid(True, 'major', color='w', linestyle='-', linewidth=0.7) ax0.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35) ax0.set_axis_bgcolor('0.95') ASCII_WordList = [ word.encode('ascii','ignore') for word in WordList ] ax0.plot(np.arange(len(TimeList)),volume,label='NumberOfTweets',linewidth=0.75) ax0.legend(loc='upper left',ncol=4) ax0.set_xlim(0,len(TimeList)-1) ax0.xaxis.tick_top() ax0.yaxis.tick_right() ax0.set_xticks(np.arange(0,len(TimeList),5)) ax0.set_xticklabels(TimeList,rotation='vertical') #HotWordColorMap ax1.imshow(ColorMap,cmap=plt.cm.binary,vmin=ColorMap.min(),vmax=ColorMap.max(),aspect='auto',origin='lower') ax1.yaxis.tick_right() ax1.set_yticks(np.arange(len(WordList))) ax1.set_yticklabels(WordList) ax1.set_xticks(np.arange(0,len(TimeList),5)) ax1.set_xticklabels(TimeList,rotation='vertical') ax1.grid(True, 'major', color='w', linestyle='-', linewidth=0.7) ax1.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35) #TweetVolumeDistributionOverHotWords ax2.grid(True, 'major', color='w', linestyle='-', linewidth=0.7) ax2.grid(True, 'minor', color='0.92', linestyle='-', linewidth=0.35) ax2.set_axis_bgcolor('0.95') ax2.invert_xaxis() ax2.barh(np.arange(len(WordList)),TweetCountArray,align='center') #add the numbers to the side of each bar PreviousValue = None for p, ch in zip(np.arange(len(WordList)), TweetCountArray): if ch!=PreviousValue: ax2.annotate(str(ch), xy=(ch + 2.5, p - 0.25), va='center') PreviousValue = ch else: continue ax2.set_yticks(np.arange(len(WordList))) ax2.set_yticklabels(WordList)#,rotation='horizontal') ax2.set_ylim(0,len(WordList)-1+0.25) #Plot table with assisting information #1. Date : Day, Date Year and TIME_START to TIME_END #2. TIME_START #3. TIME_END #4. TIME_WINDOW #5. No. of HotWords per TimeWindow #6. Total No. of unique HotWords Found #7. Max #of Tweets for HotWord & HotWord #8. Min #of Tweets for HotWord & HotWord #9. Max #of Tweets in a timeWindow & timeWindow #10.Mix #of Tweets in a timeWindow & timeWindow rowLabels = ['1. Date','2. Start time','3. End time','4. Time Window (seconds)','5. No.Of HotWords per TimeWindow','6. No. of unique hotwords','7. Max #of tweets for HotWord','8. Min #of tweets for HotWord','9. Max #of tweets in a time window','10. Min #of tweets in a time window'] DateStart = gmt_to_local(TIME_START,make_string=True,format='%a %d %b %Y') DateEnd = gmt_to_local(TIME_END,make_string=True,format='%a %d %b %Y') Date = DateStart if DateStart==DateEnd else DateStart+' to '+DateEnd start_time= gmt_to_local(TIME_START,make_string=True,format='%d %b %H:%M') end_time = gmt_to_local(TIME_END,make_string=True,format='%d %b %H:%M') cellText = [Date,start_time,end_time,scale,HotWordSize,len(set(WordList)),TweetCountArray.max(),TweetCountArray.min(),str(max(volume)),str(min(volume))] rowLabels.reverse() cellText.reverse() colLabels = ['Value'] for y, label, text in zip(range(len(cellText)),rowLabels,cellText): ax3.text(0.05,(float(y)/20)+0.05,s='%s : %s'%(label,text),size=20) ax3.xaxis.set_visible(False) ax3.yaxis.set_visible(False) fig1.savefig('%s_to_%spng'%(start_time,end_time),dpi=200,bbox_inches="tight") plt.close(fig1)