Esempio n. 1
0
def processMedia(objectId, tweet):
    '''Handles the formatting of the tweet media to be compatible with crowded.'''

    try:
        entities = tweet['entities']
    except:
        logging.error("Failed to get entities field from tweet. Tweet: \n%s" %
                      (tweet),
                      exc_info=True)
        return None

    #Get the media
    thumbUrl, lowUrl, stdUrl = getMedia(entities['media'])

    # Get the tags
    if entities.has_key('hashtags'):
        hashtags = getTags(entities['hashtags'])

    # Get the tweet
    try:
        caption = decodeEncode(tweet['text'])
    except:
        logging.debug("Failed to parse caption: %s" % (tweet['text']),
                      exc_info=True)
        caption = '***caption not parsed***'

    # Get the datetime out as ISO
    dt = getDatetime(tweet)

    # Kludge them together
    mediaTweet = {
        'standard_resolution': stdUrl,
        'low_resolution': lowUrl,
        'thumbnail': thumbUrl,
        'dt': dt,
        'source': 'twitter',
        'caption': caption,
        'tags': hashtags,
        'objectId': str(objectId)
    }

    tweetOut = {'data': [mediaTweet]}

    return tweetOut
def processMedia(objectId, tweet):
    """Handles the formatting of the tweet media to be compatible with crowded."""

    try:
        entities = tweet["entities"]
    except:
        logging.error("Failed to get entities field from tweet. Tweet: \n%s" % (tweet), exc_info=True)
        return None

    # Get the media
    thumbUrl, lowUrl, stdUrl = getMedia(entities["media"])

    # Get the tags
    if entities.has_key("hashtags"):
        hashtags = getTags(entities["hashtags"])

    # Get the tweet
    try:
        caption = decodeEncode(tweet["text"])
    except:
        logging.debug("Failed to parse caption: %s" % (tweet["text"]), exc_info=True)
        caption = "***caption not parsed***"

    # Get the datetime out as ISO
    dt = getDatetime(tweet)

    # Kludge them together
    mediaTweet = {
        "standard_resolution": stdUrl,
        "low_resolution": lowUrl,
        "thumbnail": thumbUrl,
        "dt": dt,
        "source": "twitter",
        "caption": caption,
        "tags": hashtags,
        "objectId": str(objectId),
    }

    tweetOut = {"data": [mediaTweet]}

    return tweetOut
        handleErrors(p, e)
    
    i = 1
    
    for line in f:
        
        # Tidy up before dealing with the words
        try:
            line = line.rstrip('\n').rstrip('\r')
            line = line.split(',')
            word, pho = line[0], line[1]
            
        except Exception, e:
            handleErrors(p, e) 
                   
        word = decodeEncode(word.lower())
        pho  = decodeEncode(pho.lower())
        
        # Pass to mongo inserter
        res = mongoInserter(p, collection, word, pho)

        # Counter
        i += 1
        if p.verbose == True and i % 1000 == 0:
            print i

    f.close()
    mdb.close(c, dbh)
        
        
#----------------------------------------------------------------------------------------
Esempio n. 4
0
        print e
        handleErrors(p, e)

    i = 1

    for line in f:

        # Tidy up before dealing with the words
        try:
            line = line.rstrip("\n").rstrip("\r")
            line = line.split(",")
            slang, word = line[0], line[1]
        except Exception, e:
            handleErrors(p, e)

        word = decodeEncode(word.lower())
        slang = decodeEncode(slang.lower())

        # Pass to mongo inserter
        res = mongoUpdate(p, collection, word, slang)

        # Counter
        i += 1
        if p.verbose == True and i % 100 == 0:
            print i

    f.close()
    mdb.close(c, dbh)


# ----------------------------------------------------------------------------------------
Esempio n. 5
0
        # Tidy up before dealing with the words
        try:
            line = line.rstrip('\n').rstrip('\r')
            line = line.lstrip(',').rstrip(',')
            line = line.split(',')
        except Exception, e:
            handleErrors(p, e) 
        
        # Split up the line and take the first 2 columns           
        try:
            word, token = line[0], line[1] 
        except Exception, e:
            handleErrors(p, "\nLine:\n%s\n\n" %line) 
                
        # Handle the decoding/encoding for mongo           
        word   = decodeEncode(word.lower())
        token  = decodeEncode(token)
        
        # Drop the lead and end spaces and make it a list
        token = token.strip()
        
        # Get the space-removed emoticons
        tokens = getDespaced(token)
        tokens += getDespaced(token.lower())
        
        # Pass to mongo inserter
        for token in tokens:
            res = mongoUpdate(p, emoCollection, word, token)

        # Counter
        i += 1