def processMedia(objectId, tweet): '''Handles the formatting of the tweet media to be compatible with crowded.''' try: entities = tweet['entities'] except: logging.error("Failed to get entities field from tweet. Tweet: \n%s" % (tweet), exc_info=True) return None #Get the media thumbUrl, lowUrl, stdUrl = getMedia(entities['media']) # Get the tags if entities.has_key('hashtags'): hashtags = getTags(entities['hashtags']) # Get the tweet try: caption = decodeEncode(tweet['text']) except: logging.debug("Failed to parse caption: %s" % (tweet['text']), exc_info=True) caption = '***caption not parsed***' # Get the datetime out as ISO dt = getDatetime(tweet) # Kludge them together mediaTweet = { 'standard_resolution': stdUrl, 'low_resolution': lowUrl, 'thumbnail': thumbUrl, 'dt': dt, 'source': 'twitter', 'caption': caption, 'tags': hashtags, 'objectId': str(objectId) } tweetOut = {'data': [mediaTweet]} return tweetOut
def processMedia(objectId, tweet): """Handles the formatting of the tweet media to be compatible with crowded.""" try: entities = tweet["entities"] except: logging.error("Failed to get entities field from tweet. Tweet: \n%s" % (tweet), exc_info=True) return None # Get the media thumbUrl, lowUrl, stdUrl = getMedia(entities["media"]) # Get the tags if entities.has_key("hashtags"): hashtags = getTags(entities["hashtags"]) # Get the tweet try: caption = decodeEncode(tweet["text"]) except: logging.debug("Failed to parse caption: %s" % (tweet["text"]), exc_info=True) caption = "***caption not parsed***" # Get the datetime out as ISO dt = getDatetime(tweet) # Kludge them together mediaTweet = { "standard_resolution": stdUrl, "low_resolution": lowUrl, "thumbnail": thumbUrl, "dt": dt, "source": "twitter", "caption": caption, "tags": hashtags, "objectId": str(objectId), } tweetOut = {"data": [mediaTweet]} return tweetOut
handleErrors(p, e) i = 1 for line in f: # Tidy up before dealing with the words try: line = line.rstrip('\n').rstrip('\r') line = line.split(',') word, pho = line[0], line[1] except Exception, e: handleErrors(p, e) word = decodeEncode(word.lower()) pho = decodeEncode(pho.lower()) # Pass to mongo inserter res = mongoInserter(p, collection, word, pho) # Counter i += 1 if p.verbose == True and i % 1000 == 0: print i f.close() mdb.close(c, dbh) #----------------------------------------------------------------------------------------
print e handleErrors(p, e) i = 1 for line in f: # Tidy up before dealing with the words try: line = line.rstrip("\n").rstrip("\r") line = line.split(",") slang, word = line[0], line[1] except Exception, e: handleErrors(p, e) word = decodeEncode(word.lower()) slang = decodeEncode(slang.lower()) # Pass to mongo inserter res = mongoUpdate(p, collection, word, slang) # Counter i += 1 if p.verbose == True and i % 100 == 0: print i f.close() mdb.close(c, dbh) # ----------------------------------------------------------------------------------------
# Tidy up before dealing with the words try: line = line.rstrip('\n').rstrip('\r') line = line.lstrip(',').rstrip(',') line = line.split(',') except Exception, e: handleErrors(p, e) # Split up the line and take the first 2 columns try: word, token = line[0], line[1] except Exception, e: handleErrors(p, "\nLine:\n%s\n\n" %line) # Handle the decoding/encoding for mongo word = decodeEncode(word.lower()) token = decodeEncode(token) # Drop the lead and end spaces and make it a list token = token.strip() # Get the space-removed emoticons tokens = getDespaced(token) tokens += getDespaced(token.lower()) # Pass to mongo inserter for token in tokens: res = mongoUpdate(p, emoCollection, word, token) # Counter i += 1