def on_data(self, data): try: item = json.loads(data) #judge who has been mentioned mentionedPerson = None if item["entities"]["user_mentions"]: mentionedPerson = getMentioned( item["entities"]["user_mentions"]) pass #judge the uploaded media uploadedMedia = None if "media" in item["entities"]: uploadedMedia = getMedia(item["entities"]["media"]) pass #judge the metioned topic topic = None if item["entities"]["hashtags"]: topic = getTopic(item["entities"]["hashtags"]) pass #judge the metioned url mentionedUrl = None if item["entities"]["urls"]: mentionedUrl = getUrl(item["entities"]["urls"]) pass #integrated the entities of text "media":uploadedMedia textEntities = { "mentioned": mentionedPerson, "media": uploadedMedia, "topic": topic, "mentionedUrl": mentionedUrl } #judge the geoLocation geoInfo = None if item["coordinates"]: geoInfo = { "type": item["coordinates"]["type"], "locations": item["coordinates"]["coordinates"] } pass elif item["place"]["bounding_box"]: geoInfo = { "type": item["place"]["bounding_box"]["type"], "locations": item["place"]["bounding_box"]["coordinates"] } #update Polygon geoInfo into Point if geoInfo and geoInfo['type'] == 'Polygon': p = Polygon(([ geoInfo['locations'][0][0][0], geoInfo['locations'][0][0][1] ], [ geoInfo['locations'][0][1][0], geoInfo['locations'][0][1][1] ], [ geoInfo['locations'][0][2][0], geoInfo['locations'][0][2][1] ], [ geoInfo['locations'][0][3][0], geoInfo['locations'][0][3][1] ])) pCentre = p.centroid #update the geoInfo from Polygon to Point geoInfo['type'] = u'Point' geoInfo['locations'] = [pCentre.x, pCentre.y] pass #store country name and city name country = None if "country" in item["place"]: country = item["place"]["country"] pass city = None if "full_name" in item["place"]: if item["place"]["full_name"].split(',')[0] in cityList: city = item["place"]["full_name"].split(',')[0] #if not find city, then use coordinate to judge if city is None: if geoInfo: p = Point(geoInfo['locations'][0], geoInfo['locations'][1]) city = findCities(p) pass pass #define the stored data storeData = { "_id": item["id_str"], "text": item["text"], "textInfo": textEntities, "userName": item["user"]["screen_name"], "userImage": item["user"]["profile_image_url"], "TweetTime": item["created_at"], "geoInfo": geoInfo, "country": country, "city": city } #get sentiment storeData['prediction'] = Smodel_tweet_getter.learn( storeData, model[0], model[1], model[2]) #store information into couchdb if city and storeData[ 'userName'] != 'will_i_ammg' and 'prediction' in storeData: db.save(storeData) print('Storing tweet_id %s - %s - %s - %s' % (item["id_str"], city, storeData['prediction'], storeData['geoInfo']['type'])) pass else: print('Fileted tweet_id %s - not store - no cities' % item["id_str"]) return True except BaseException, e: print 'failed ondata,', str(e)
def on_data(self, data): try: item = json.loads(data) #judge who has been mentioned mentionedPerson = None if item["entities"]["user_mentions"]: mentionedPerson = getMentioned(item["entities"]["user_mentions"]) pass #judge the uploaded media uploadedMedia = None if "media" in item["entities"]: uploadedMedia = getMedia(item["entities"]["media"]) pass #judge the metioned topic topic = None if item["entities"]["hashtags"]: topic = getTopic(item["entities"]["hashtags"]) pass #judge the metioned url mentionedUrl = None if item["entities"]["urls"]: mentionedUrl = getUrl(item["entities"]["urls"]) pass #integrated the entities of text "media":uploadedMedia textEntities = {"mentioned":mentionedPerson,"media":uploadedMedia,"topic":topic,"mentionedUrl":mentionedUrl} #judge the geoLocation geoInfo = None if item["coordinates"]: geoInfo = {"type":item["coordinates"]["type"],"locations":item["coordinates"]["coordinates"]} pass elif item["place"]["bounding_box"]: geoInfo = {"type":item["place"]["bounding_box"]["type"],"locations":item["place"]["bounding_box"]["coordinates"]} #update Polygon geoInfo into Point if geoInfo and geoInfo['type'] == 'Polygon': p = Polygon(([geoInfo['locations'][0][0][0],geoInfo['locations'][0][0][1]], [geoInfo['locations'][0][1][0],geoInfo['locations'][0][1][1]], [geoInfo['locations'][0][2][0],geoInfo['locations'][0][2][1]], [geoInfo['locations'][0][3][0],geoInfo['locations'][0][3][1]])) pCentre = p.centroid #update the geoInfo from Polygon to Point geoInfo['type'] = u'Point' geoInfo['locations']= [pCentre.x,pCentre.y] pass #store country name and city name country = None if "country" in item["place"]: country = item["place"]["country"] pass city = None if "full_name" in item["place"]: if item["place"]["full_name"].split(',')[0] in cityList: city = item["place"]["full_name"].split(',')[0] #if not find city, then use coordinate to judge if city is None: if geoInfo: p = Point(geoInfo['locations'][0],geoInfo['locations'][1]) city = findCities(p) pass pass #define the stored data storeData = { "_id":item["id_str"], "text":item["text"], "textInfo":textEntities, "userName":item["user"]["screen_name"], "userImage":item["user"]["profile_image_url"], "TweetTime":item["created_at"], "geoInfo":geoInfo, "country":country, "city":city } #get sentiment storeData['prediction'] = Smodel_tweet_getter.learn(storeData,model[0],model[1],model[2]) #store information into couchdb if city and storeData['userName'] != 'will_i_ammg' and 'prediction' in storeData: db.save(storeData) print ('Storing tweet_id %s - %s - %s - %s' %(item["id_str"],city,storeData['prediction'],storeData['geoInfo']['type'])) pass else: print ('Fileted tweet_id %s - not store - no cities' %item["id_str"]) return True except BaseException, e: print 'failed ondata,',str(e)
return True except BaseException, e: print 'failed ondata,', str(e) def on_error(self, status): print('status: %s' % status) if __name__ == '__main__': listener = StdOutListener() auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) for key in cities: cityList.append(key) print 'The Fecting Tweets on Cities:', cityList model = Smodel_tweet_getter.buildmodel() #define couchdb you can change the ip and db name here couch = couchdb.Server('http://127.0.0.1:5984/') db = couch['cities'] stream = Stream(auth, listener) stream.filter(locations=Australia, languages=["en"])
def on_error(self, status): print('status: %s' % status) if __name__ == '__main__': listener = StdOutListener() auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) for key in cities: cityList.append(key) print 'The Fecting Tweets on Cities:',cityList model = Smodel_tweet_getter.buildmodel() #define couchdb you can change the ip and db name here couch = couchdb.Server('http://127.0.0.1:5984/') db = couch['cities'] stream = Stream(auth, listener) stream.filter(locations = Australia,languages=["en"])