def searchWordpress(raw_keyword):
    keyword = urllib2.quote(raw_keyword)
    opsecHeader.writeLastCheckedTime('wordpress')

    ############### WORDPRESS ##################
    #
    # See http://en.search.wordpress.com/?q=obama&s=date&f=json
    #
    # Arguments:
    # q = keyword to search for
    # s = sort by; we want date; not relevance
    # f = format; we want JSON

    wordpressQueryString = 'http://en.search.wordpress.com/?q=' + keyword + '&s=date&f=json'

    opsecHeader.queryWebsiteJSON("wordpress", wordpressQueryString)

    wordpressLatestEpoch = getLatestWordpress()
    wordpressResults = opsecHeader.readResultsJSON('wordpress')
    epochTime = wordpressResults[0]['epoch_time']

    if str(wordpressLatestEpoch) == str(epochTime):
        print "No new blog posts since last query."
    else:
        for x in wordpressResults:
            epochTime = x['epoch_time']
            if int(wordpressLatestEpoch) < int(epochTime):
                title = (x['title']).encode('utf-8')
                author = (x['author']).encode('utf-8')
                content = (x['content']).encode('utf-8')
                link = (x['link']).encode('utf-8')
                writeLatestWordpress(epochTime, title, author, content, link, keyword)
                opsecHeader.sendEmail(keyword, "Wordpress")
def searchWordpress(raw_keyword):
    keyword = urllib2.quote(raw_keyword)
    opsecHeader.writeLastCheckedTime('wordpress')

    ############### WORDPRESS ##################
    #
    # See http://en.search.wordpress.com/?q=obama&s=date&f=json
    #
    # Arguments:
    # q = keyword to search for
    # s = sort by; we want date; not relevance
    # f = format; we want JSON

    wordpressQueryString = 'http://en.search.wordpress.com/?q=' + keyword + '&s=date&f=json'

    opsecHeader.queryWebsiteJSON("wordpress", wordpressQueryString)

    wordpressLatestEpoch = getLatestWordpress()
    wordpressResults = opsecHeader.readResultsJSON('wordpress')
    epochTime = wordpressResults[0]['epoch_time']

    if str(wordpressLatestEpoch) == str(epochTime):
        print "No new blog posts since last query."
    else:
        for x in wordpressResults:
            epochTime = x['epoch_time']
            if int(wordpressLatestEpoch) < int(epochTime):
                title = (x['title']).encode('utf-8')
                author = (x['author']).encode('utf-8')
                content = (x['content']).encode('utf-8')
                link = (x['link']).encode('utf-8')
                writeLatestWordpress(epochTime, title, author, content, link,
                                     keyword)
                opsecHeader.sendEmail(keyword, "Wordpress")
Exemple #3
0
def searchTwitter(raw_keyword):
    keyword = urllib2.quote(raw_keyword)
    opsecHeader.writeLastCheckedTime('twitter')

    # See https://dev.twitter.com/docs/api/1/get/search
    tweetSinceDate = str(getLatestTweet(None, keyword)[0])
    searchQueryString = 'http://search.twitter.com/search.json?q=' + keyword + '&rpp=10&result_type=recent'

    if tweetSinceDate != '0': # Twitter does not play nice with invalid since_id's
        searchQueryString += '&since_id=' + tweetSinceDate

    opsecHeader.queryWebsiteJSON("twitter", searchQueryString)

    twitterResults = opsecHeader.readResultsJSON('twitter')
    twitterAllResults = twitterResults['results']

    if not twitterAllResults:
        print "No results."
    else:
        existingEpochTime = getLatestTweet(None, keyword)[1]

        for x in twitterAllResults:
            created_at = (x['created_at']).encode('utf-8')
            epochTimeFound = calendar.timegm((time.strptime(created_at, '%a, %d %b %Y %H:%M:%S +0000')))
            if int(epochTimeFound) > int(existingEpochTime):
                twitterID = (x['id'])
                from_user = (x['from_user']).encode('utf-8')
                text = (x['text']).encode('utf-8')
                created_at = (x['created_at']).encode('utf-8')
                profile_image_url_https = (x['profile_image_url_https']).encode('utf-8')
                location, lat, lng = genGeo(from_user)

                writeTweet(twitterID, from_user, text, created_at, keyword, location, lat, lng, epochTimeFound, profile_image_url_https)
                opsecHeader.sendEmail(keyword, "Twitter")
def getPastes():
    global pasteIDsfound, pasteMaxSize

    if (len(pasteIDsfound) >= (pasteMaxSize * 2)):
        print "[-] cleaning list"
        for i in range(0, len(pasteIDsfound) - (pasteMaxSize)):
            pasteIDsfound.pop(0)
    print "[-] Pulling archive list..."
    try:
        page = urllib2.urlopen("http://www.pastebin.com/archive.php").read()
        regex = re.compile(
            '<td><img src="/i/t.gif" .*?<a href="/(.*?)">(.*?)</a></td>.*?<td>(.*?)</td>',
            re.S)
        pastes = regex.findall(page)
        for p in pastes:
            pasteID = p[0]
            pasteTitle = p[1]
            fetchAttempt = 0
            opsecHeader.writeLastCheckedTime('pastebin')
            if (pasteID not in pasteIDsfound):
                print "[-] New paste(", pasteID, ")"
                pasteIDsfound.append(pasteID)
                print len(pasteIDsfound)
                pastePage = ''
                fetchAttempts = 0
                while (pastePage == ''):
                    print "[+] Pulling Raw paste"
                    sock = urllib2.urlopen("http://pastebin.com/raw.php?i=" +
                                           pasteID)
                    pastePage = sock.read()
                    encoding = sock.headers['Content-type'].split('charset=')[
                        1]  # iso-8859-1
                    try:
                        pastePage = pastePage.decode(encoding).encode('utf-8')
                        if (pastePage == ''):
                            pastePage = 'empty paste from http://pastebin.com/raw.php?i=' + pasteID
                        if "requesting a little bit too much" in pastePage:
                            pastePage = ''
                            print "[-] hitting pastebin too quickly, sleeping for 2 seconds and trying again.."
                            time.sleep(2)
                    except:
                        print "[!] couldnt decode page to utf-8"
                    print "[-] Sleeping for 1 second"
                    time.sleep(1)
                    fetchAttempt = fetchAttempt + 1
                    if (fetchAttempt > 1):
                        print "[+] Couldnt fetch " + "http://pastebin.com/raw.php?i=" + pasteID + " after 2 tries"
                        pastePage = '  '
                addPaste(pasteTitle, pasteID, pastePage)
            else:
                print "[-] Already seen ", pasteID
        sleeptime = random.randint(15, 45)
        print "[-] sleeping for", sleeptime, "seconds.."
        time.sleep(sleeptime)
        return 1
    except IOError:
        print "[!] Error fetching list of pastes, sleeping for 10 seconds and trying again"
        time.sleep(10)
        return 0
def getPost(account_id, site, user_id, content_type):
    latest_epoch_time = getLatestPost(user_id, site, content_type)
    queryString = 'http://api.stackexchange.com/2.1/users/' + str(user_id) + '/' + str(content_type) + 's?fromdate=' + str(latest_epoch_time) + '&order=desc&sort=creation&site=' + site + '&key=' + opsecHeader.stackexchange_api_key
    opsecHeader.queryWebsiteJSON(str(site) + str(user_id) + str(content_type), queryString)
    opsecHeader.writeLastCheckedTime('stackexchange')

    results = opsecHeader.readResultsJSON(str(site) + str(user_id) + str(content_type))
    items = results['items']
    for x in items:

        creation_date = x['creation_date']
        if(latest_epoch_time != creation_date):

            if(content_type == 'question'):
                question_id = x['question_id']
                url = x['link']
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                dirty_content = soup.find('div', {'class': 'post-text', 'itemprop': 'description'})
                content = ''.join(dirty_content.findAll(text=True))

            elif(content_type == 'answer'):
                answer_id = x['answer_id']
                url = "http://" + str(site) + ".com/a/" + str(answer_id)
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                answer_id = 'answer-' + str(answer_id)
                div_content = soup.find('div', {'id': answer_id})
                dirty_content = div_content.find('div', {'class': 'post-text'})
                content = ''.join(dirty_content.findAll(text=True))

            elif(content_type == 'comment'):
                comment_id = x['comment_id']
                post_id = x['post_id']
                short_url = 'http://' + str(site) + '.com/q/' + str(post_id)
                long_url = str(urllib2.urlopen(short_url).geturl())
                long_url = long_url.split("#")[0]
                url = long_url + '#comment' + str(comment_id) + '_' + str(post_id)
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                comment_id_format = 'comment-' + str(comment_id)
                try: #Will fail if comments need to be loaded via AJAX
                    comment_tr = soup.find('tr', {'id': comment_id_format})
                    dirty_content = comment_tr.find('span', {'class': 'comment-copy'})
                    content = ''.join(dirty_content.findAll(text=True))
                except AttributeError:
                    content = 'See website'

            profile_image = x['owner']['profile_image']
            display_name = x['owner']['display_name']

            writeDisplayName(account_id, display_name)
            writeLatestPost(account_id, user_id, site, content_type, creation_date, profile_image, url, content, display_name)

            keywords = opsecHeader.getUserKeywords(account_id, 'stackexchange')
            for keyword in keywords:
                if keyword in content:
                    opsecHeader.sendEmail(keyword, "Stack Exchange", display_name)
Exemple #6
0
def getPastes():
    global pasteIDsfound, pasteMaxSize

    if(len(pasteIDsfound) >= (pasteMaxSize * 2)):
        print "[-] cleaning list"
        for i in range(0, len(pasteIDsfound) - (pasteMaxSize)):
            pasteIDsfound.pop(0)
    print "[-] Pulling archive list..."
    try:
        page = urllib2.urlopen("http://www.pastebin.com/archive.php").read()
        regex = re.compile('<td><img src="/i/t.gif" .*?<a href="/(.*?)">(.*?)</a></td>.*?<td>(.*?)</td>', re.S)
        pastes = regex.findall(page)
        for p in pastes:
            pasteID = p[0]
            pasteTitle = p[1]
            fetchAttempt = 0
            opsecHeader.writeLastCheckedTime('pastebin')
            if(pasteID not in pasteIDsfound):
                print "[-] New paste(", pasteID, ")"
                pasteIDsfound.append(pasteID)
                print len(pasteIDsfound)
                pastePage = ''
                fetchAttempts = 0
                while (pastePage == ''):
                    print "[+] Pulling Raw paste"
                    sock = urllib2.urlopen("http://pastebin.com/raw.php?i=" + pasteID)
                    pastePage = sock.read()
                    encoding = sock.headers['Content-type'].split('charset=')[1] # iso-8859-1
                    try:
                        pastePage = pastePage.decode(encoding).encode('utf-8')
                        if(pastePage == ''):
                            pastePage = 'empty paste from http://pastebin.com/raw.php?i=' + pasteID
                        if "requesting a little bit too much" in pastePage:
                            pastePage = ''
                            print "[-] hitting pastebin too quickly, sleeping for 2 seconds and trying again.."
                            time.sleep(2)
                    except:
                        print "[!] couldnt decode page to utf-8"
                    print "[-] Sleeping for 1 second"
                    time.sleep(1)
                    fetchAttempt = fetchAttempt + 1
                    if(fetchAttempt > 1):
                        print "[+] Couldnt fetch " + "http://pastebin.com/raw.php?i=" + pasteID + " after 2 tries"
                        pastePage = '  '
                addPaste(pasteTitle, pasteID, pastePage)
            else:
                print "[-] Already seen ", pasteID
        sleeptime = random.randint(15, 45)
        print "[-] sleeping for", sleeptime, "seconds.."
        time.sleep(sleeptime)
        return 1
    except IOError:
        print "[!] Error fetching list of pastes, sleeping for 10 seconds and trying again"
        time.sleep(10)
        return 0
Exemple #7
0
def getUserComments(user):
    #http://www.reddit.com/dev/api

    user = urllib2.quote(user)

    redditQueryString = 'http://www.reddit.com/user/' + user + '/overview.json'
    opsecHeader.queryWebsiteJSON("reddit", redditQueryString,
                                 opsecHeader.reddit_api_key)
    opsecHeader.writeLastCheckedTime('reddit')

    redditResults = opsecHeader.readResultsJSON('reddit')
    try:
        redditAllResults = redditResults['data']['children']
    except KeyError:
        redditAllResults = None
    epoch_time_existing = getLatestUserEpoch(user)

    if not redditAllResults:
        print "No results."
    else:
        for x in redditAllResults:
            epoch_time_found = str(
                (x['data']['created_utc'])).encode('utf-8')[:-2]
            if int(epoch_time_found) > int(epoch_time_existing):
                try:
                    link_id = (x['data']['link_id']).encode('utf-8')[3:]
                except KeyError:
                    link_id = ''
                comment_id = (x['data']['id']).encode('utf-8')
                author = (x['data']['author']).encode('utf-8')
                try:
                    body = (x['data']['body']).encode('utf-8')
                except KeyError:
                    body = ''
                try:
                    link_title = (x['data']['link_title']).encode('utf-8')
                except:
                    link_title = ''
                subreddit = (x['data']['subreddit']).encode('utf-8')
                permalink = 'http://www.reddit.com/r/' + subreddit + '/comments/' + link_id + '/' + urllib2.quote(
                    link_title) + '/' + comment_id
                writeLatestPost(author, body, link_id, comment_id, link_title,
                                subreddit, epoch_time_found, permalink)

                keywords = opsecHeader.getUserKeywords(author, 'reddit')
                for keyword in keywords:
                    if keyword in body:
                        opsecHeader.sendEmail(keyword, "Reddit", author)
Exemple #8
0
def getUserTweets(user):
    screen_name = urllib2.quote(user)
    opsecHeader.writeLastCheckedTime('twitter')

    # See https://dev.twitter.com/docs/api/1/get/statuses/user_timeline
    tweetSinceDate = str(getLatestTweet(screen_name, None)[0])
    epochTimeExisting = getLatestTweet(screen_name, None)[1]

    twitterQueryString = 'https://api.twitter.com/1/statuses/user_timeline.json?screen_name=' + screen_name + '&count=10'

    if tweetSinceDate != '0':  # Twitter does not play nice with invalid since_id's
        twitterQueryString += '&since_id=' + tweetSinceDate

    opsecHeader.queryWebsiteJSON("twitterUserTweets", twitterQueryString)

    twitterResults = opsecHeader.readResultsJSON('twitterUserTweets')
    if twitterResults is not None:
        twitterAllResults = twitterResults
    else:
        twitterAllResults = None

    if not twitterAllResults:
        print "No results."
    else:
        for x in twitterAllResults:
            created_at = (x['created_at']).encode('utf-8')
            epochTimeFound = calendar.timegm(
                (email.utils.parsedate(created_at)))
            if int(epochTimeFound) > int(epochTimeExisting):
                twitterID = (x['id'])
                text = (x['text']).encode('utf-8')
                from_user = (x['user']['screen_name']).encode('utf-8')
                created_at = (x['created_at']).encode('utf-8')
                profile_image_url_https = (
                    x['user']['profile_image_url_https']).encode('utf-8')
                location, lat, lng = genGeo(from_user)

                writeTweet(twitterID, from_user, text, created_at, '',
                           location, lat, lng, epochTimeFound,
                           profile_image_url_https)
                keywords = opsecHeader.getUserKeywords(from_user, 'twitter')
                for keyword in keywords:
                    if keyword in text:
                        opsecHeader.sendEmail(keyword, "Twitter", from_user)
Exemple #9
0
def getUserComments(user):
    #http://www.reddit.com/dev/api

    user = urllib2.quote(user)

    redditQueryString = 'http://www.reddit.com/user/' + user + '/overview.json'
    opsecHeader.queryWebsiteJSON("reddit", redditQueryString, opsecHeader.reddit_api_key)
    opsecHeader.writeLastCheckedTime('reddit')

    redditResults = opsecHeader.readResultsJSON('reddit')
    try:
        redditAllResults = redditResults['data']['children']
    except KeyError:
        redditAllResults = None
    epoch_time_existing = getLatestUserEpoch(user)

    if not redditAllResults:
        print "No results."
    else:
        for x in redditAllResults:
            epoch_time_found = str((x['data']['created_utc'])).encode('utf-8')[:-2]
            if int(epoch_time_found) > int(epoch_time_existing):
                try:
                    link_id = (x['data']['link_id']).encode('utf-8')[3:]
                except KeyError:
                    link_id = ''
                comment_id = (x['data']['id']).encode('utf-8')
                author = (x['data']['author']).encode('utf-8')
                try:
                    body = (x['data']['body']).encode('utf-8')
                except KeyError:
                    body = ''
                try:
                    link_title = (x['data']['link_title']).encode('utf-8')
                except:
                    link_title = ''
                subreddit = (x['data']['subreddit']).encode('utf-8')
                permalink = 'http://www.reddit.com/r/' + subreddit + '/comments/' + link_id + '/' + urllib2.quote(link_title) + '/' + comment_id
                writeLatestPost(author, body, link_id, comment_id, link_title, subreddit, epoch_time_found, permalink)

                keywords = opsecHeader.getUserKeywords(author, 'reddit')
                for keyword in keywords:
                    if keyword in body:
                        opsecHeader.sendEmail(keyword, "Reddit", author)
Exemple #10
0
def getUserTweets(user):
    screen_name = urllib2.quote(user)
    opsecHeader.writeLastCheckedTime('twitter')

    # See https://dev.twitter.com/docs/api/1/get/statuses/user_timeline
    tweetSinceDate = str(getLatestTweet(screen_name, None)[0])
    epochTimeExisting = getLatestTweet(screen_name, None)[1]

    twitterQueryString = 'https://api.twitter.com/1/statuses/user_timeline.json?screen_name=' + screen_name + '&count=10'

    if tweetSinceDate != '0': # Twitter does not play nice with invalid since_id's
        twitterQueryString += '&since_id=' + tweetSinceDate

    opsecHeader.queryWebsiteJSON("twitterUserTweets", twitterQueryString)

    twitterResults = opsecHeader.readResultsJSON('twitterUserTweets')
    if twitterResults is not None:
        twitterAllResults = twitterResults
    else:
        twitterAllResults = None

    if not twitterAllResults:
        print "No results."
    else:
        for x in twitterAllResults:
            created_at = (x['created_at']).encode('utf-8')
            epochTimeFound = calendar.timegm((email.utils.parsedate(created_at)))
            if int(epochTimeFound) > int(epochTimeExisting):
                twitterID = (x['id'])
                text = (x['text']).encode('utf-8')
                from_user = (x['user']['screen_name']).encode('utf-8')
                created_at = (x['created_at']).encode('utf-8')
                profile_image_url_https = (x['user']['profile_image_url_https']).encode('utf-8')
                location, lat, lng = genGeo(from_user)

                writeTweet(twitterID, from_user, text, created_at, '', location, lat, lng, epochTimeFound, profile_image_url_https)
                keywords = opsecHeader.getUserKeywords(from_user, 'twitter')
                for keyword in keywords:
                    if keyword in text:
                        opsecHeader.sendEmail(keyword, "Twitter", from_user)
Exemple #11
0
def searchFacebook(raw_keyword):
    opsecHeader.writeLastCheckedTime('facebook')
    keyword = urllib2.quote(raw_keyword)
    # See https://developers.facebook.com/docs/reference/api/
    #
    # Arguments:
    # q = keyword we are searching for
    # type = kind of object we are searching for e.g post
    #
    # Returns:
    # name; id (facebook.com/id for their profile)

    facebookLatestEpoch = getLatestPostTime()
    facebookQueryString = 'https://graph.facebook.com/search?q=' + keyword + '&type=post'
    opsecHeader.queryWebsiteJSON("facebook", facebookQueryString)

    print "Parsing Facebook data..."

    facebookResults = opsecHeader.readResultsJSON('facebook')
    facebookAllResults = facebookResults['data']

    if facebookAllResults:
        for x in facebookAllResults:
            if 'message' in x:
                message = x['message'].encode('utf-8')
                name = (x['from']['name']).encode('utf-8')
                user_id = (x['from']['id']).encode('utf-8')
                updated_time = (x['updated_time']).encode('utf-8')
                epoch_time = calendar.timegm(
                    (time.strptime(updated_time, '%Y-%m-%dT%H:%M:%S+0000')))

                if int(epoch_time) > int(facebookLatestEpoch):
                    profilePicture = getProfilePicture(user_id)
                    writeLatestPost(name, user_id, message, profilePicture,
                                    updated_time, keyword, epoch_time)
                    opsecHeader.sendEmail(keyword, "Facebook")
                    print "Updated Time: " + updated_time
                else:
                    print "Post too old."
Exemple #12
0
def searchTwitter(raw_keyword):
    keyword = urllib2.quote(raw_keyword)
    opsecHeader.writeLastCheckedTime('twitter')

    # See https://dev.twitter.com/docs/api/1/get/search
    tweetSinceDate = str(getLatestTweet(None, keyword)[0])
    searchQueryString = 'http://search.twitter.com/search.json?q=' + keyword + '&rpp=10&result_type=recent'

    if tweetSinceDate != '0':  # Twitter does not play nice with invalid since_id's
        searchQueryString += '&since_id=' + tweetSinceDate

    opsecHeader.queryWebsiteJSON("twitter", searchQueryString)

    twitterResults = opsecHeader.readResultsJSON('twitter')
    twitterAllResults = twitterResults['results']

    if not twitterAllResults:
        print "No results."
    else:
        existingEpochTime = getLatestTweet(None, keyword)[1]

        for x in twitterAllResults:
            created_at = (x['created_at']).encode('utf-8')
            epochTimeFound = calendar.timegm(
                (time.strptime(created_at, '%a, %d %b %Y %H:%M:%S +0000')))
            if int(epochTimeFound) > int(existingEpochTime):
                twitterID = (x['id'])
                from_user = (x['from_user']).encode('utf-8')
                text = (x['text']).encode('utf-8')
                created_at = (x['created_at']).encode('utf-8')
                profile_image_url_https = (
                    x['profile_image_url_https']).encode('utf-8')
                location, lat, lng = genGeo(from_user)

                writeTweet(twitterID, from_user, text, created_at, keyword,
                           location, lat, lng, epochTimeFound,
                           profile_image_url_https)
                opsecHeader.sendEmail(keyword, "Twitter")
Exemple #13
0
def searchFacebook(raw_keyword):
    opsecHeader.writeLastCheckedTime('facebook')
    keyword = urllib2.quote(raw_keyword)
    # See https://developers.facebook.com/docs/reference/api/
    #
    # Arguments:
    # q = keyword we are searching for
    # type = kind of object we are searching for e.g post
    #
    # Returns:
    # name; id (facebook.com/id for their profile)

    facebookLatestEpoch = getLatestPostTime()
    facebookQueryString = 'https://graph.facebook.com/search?q=' + keyword + '&type=post'
    opsecHeader.queryWebsiteJSON("facebook", facebookQueryString)

    print "Parsing Facebook data..."

    facebookResults = opsecHeader.readResultsJSON('facebook')
    facebookAllResults = facebookResults['data']

    if facebookAllResults:
        for x in facebookAllResults:
            if 'message' in x:
                message = x['message'].encode('utf-8')
                name = (x['from']['name']).encode('utf-8')
                user_id = (x['from']['id']).encode('utf-8')
                updated_time = (x['updated_time']).encode('utf-8')
                epoch_time = calendar.timegm((time.strptime(updated_time, '%Y-%m-%dT%H:%M:%S+0000')))

                if int(epoch_time) > int(facebookLatestEpoch):
                    profilePicture = getProfilePicture(user_id)
                    writeLatestPost(name, user_id, message, profilePicture, updated_time, keyword, epoch_time)
                    opsecHeader.sendEmail(keyword, "Facebook")
                    print "Updated Time: " + updated_time
                else:
                    print "Post too old."
Exemple #14
0
def getPost(account_id, site, user_id, content_type):
    latest_epoch_time = getLatestPost(user_id, site, content_type)
    queryString = 'http://api.stackexchange.com/2.1/users/' + str(
        user_id
    ) + '/' + str(content_type) + 's?fromdate=' + str(
        latest_epoch_time
    ) + '&order=desc&sort=creation&site=' + site + '&key=' + opsecHeader.stackexchange_api_key
    opsecHeader.queryWebsiteJSON(
        str(site) + str(user_id) + str(content_type), queryString)
    opsecHeader.writeLastCheckedTime('stackexchange')

    results = opsecHeader.readResultsJSON(
        str(site) + str(user_id) + str(content_type))
    items = results['items']
    for x in items:

        creation_date = x['creation_date']
        if (latest_epoch_time != creation_date):

            if (content_type == 'question'):
                question_id = x['question_id']
                url = x['link']
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                dirty_content = soup.find('div', {
                    'class': 'post-text',
                    'itemprop': 'description'
                })
                content = ''.join(dirty_content.findAll(text=True))

            elif (content_type == 'answer'):
                answer_id = x['answer_id']
                url = "http://" + str(site) + ".com/a/" + str(answer_id)
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                answer_id = 'answer-' + str(answer_id)
                div_content = soup.find('div', {'id': answer_id})
                dirty_content = div_content.find('div', {'class': 'post-text'})
                content = ''.join(dirty_content.findAll(text=True))

            elif (content_type == 'comment'):
                comment_id = x['comment_id']
                post_id = x['post_id']
                short_url = 'http://' + str(site) + '.com/q/' + str(post_id)
                long_url = str(urllib2.urlopen(short_url).geturl())
                long_url = long_url.split("#")[0]
                url = long_url + '#comment' + str(comment_id) + '_' + str(
                    post_id)
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                comment_id_format = 'comment-' + str(comment_id)
                try:  #Will fail if comments need to be loaded via AJAX
                    comment_tr = soup.find('tr', {'id': comment_id_format})
                    dirty_content = comment_tr.find('span',
                                                    {'class': 'comment-copy'})
                    content = ''.join(dirty_content.findAll(text=True))
                except AttributeError:
                    content = 'See website'

            profile_image = x['owner']['profile_image']
            display_name = x['owner']['display_name']

            writeDisplayName(account_id, display_name)
            writeLatestPost(account_id, user_id, site, content_type,
                            creation_date, profile_image, url, content,
                            display_name)

            keywords = opsecHeader.getUserKeywords(account_id, 'stackexchange')
            for keyword in keywords:
                if keyword in content:
                    opsecHeader.sendEmail(keyword, "Stack Exchange",
                                          display_name)