Esempio n. 1
0
def getPost(account_id, site, user_id, content_type):
    latest_epoch_time = getLatestPost(user_id, site, content_type)
    queryString = 'http://api.stackexchange.com/2.1/users/' + str(user_id) + '/' + str(content_type) + 's?fromdate=' + str(latest_epoch_time) + '&order=desc&sort=creation&site=' + site + '&key=' + opsecHeader.stackexchange_api_key
    opsecHeader.queryWebsiteJSON(str(site) + str(user_id) + str(content_type), queryString)
    opsecHeader.writeLastCheckedTime('stackexchange')

    results = opsecHeader.readResultsJSON(str(site) + str(user_id) + str(content_type))
    items = results['items']
    for x in items:

        creation_date = x['creation_date']
        if(latest_epoch_time != creation_date):

            if(content_type == 'question'):
                question_id = x['question_id']
                url = x['link']
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                dirty_content = soup.find('div', {'class': 'post-text', 'itemprop': 'description'})
                content = ''.join(dirty_content.findAll(text=True))

            elif(content_type == 'answer'):
                answer_id = x['answer_id']
                url = "http://" + str(site) + ".com/a/" + str(answer_id)
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                answer_id = 'answer-' + str(answer_id)
                div_content = soup.find('div', {'id': answer_id})
                dirty_content = div_content.find('div', {'class': 'post-text'})
                content = ''.join(dirty_content.findAll(text=True))

            elif(content_type == 'comment'):
                comment_id = x['comment_id']
                post_id = x['post_id']
                short_url = 'http://' + str(site) + '.com/q/' + str(post_id)
                long_url = str(urllib2.urlopen(short_url).geturl())
                long_url = long_url.split("#")[0]
                url = long_url + '#comment' + str(comment_id) + '_' + str(post_id)
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                comment_id_format = 'comment-' + str(comment_id)
                try: #Will fail if comments need to be loaded via AJAX
                    comment_tr = soup.find('tr', {'id': comment_id_format})
                    dirty_content = comment_tr.find('span', {'class': 'comment-copy'})
                    content = ''.join(dirty_content.findAll(text=True))
                except AttributeError:
                    content = 'See website'

            profile_image = x['owner']['profile_image']
            display_name = x['owner']['display_name']

            writeDisplayName(account_id, display_name)
            writeLatestPost(account_id, user_id, site, content_type, creation_date, profile_image, url, content, display_name)

            keywords = opsecHeader.getUserKeywords(account_id, 'stackexchange')
            for keyword in keywords:
                if keyword in content:
                    opsecHeader.sendEmail(keyword, "Stack Exchange", display_name)
Esempio n. 2
0
def addPaste(title, id, paste):
    keywords = opsecHeader.getUserKeywords('all', 'pastebin')
    for keyword in keywords:
        if keyword in paste:
            now = int(time.mktime(time.localtime()))
            sql = "INSERT INTO `pastebin` (`epoch_time`, `title`, `paste`, `pasteID`, `keyword`) VALUES (%s, %s, %s, %s, %s)"
            try:
                if(opsecHeader.cur.execute(sql, (now, title, paste, id, keyword))):
                    opsecHeader.db.commit()
                    print "[+] Added."
            except:
                print '''[!] DB Problem (id:%s) NOT inserted''' % (id)
                print sys.exc_info()[0]
                return False
            opsecHeader.sendEmail(keyword, "Pastebin")
Esempio n. 3
0
def getUserComments(user):
    #http://www.reddit.com/dev/api

    user = urllib2.quote(user)

    redditQueryString = 'http://www.reddit.com/user/' + user + '/overview.json'
    opsecHeader.queryWebsiteJSON("reddit", redditQueryString,
                                 opsecHeader.reddit_api_key)
    opsecHeader.writeLastCheckedTime('reddit')

    redditResults = opsecHeader.readResultsJSON('reddit')
    try:
        redditAllResults = redditResults['data']['children']
    except KeyError:
        redditAllResults = None
    epoch_time_existing = getLatestUserEpoch(user)

    if not redditAllResults:
        print "No results."
    else:
        for x in redditAllResults:
            epoch_time_found = str(
                (x['data']['created_utc'])).encode('utf-8')[:-2]
            if int(epoch_time_found) > int(epoch_time_existing):
                try:
                    link_id = (x['data']['link_id']).encode('utf-8')[3:]
                except KeyError:
                    link_id = ''
                comment_id = (x['data']['id']).encode('utf-8')
                author = (x['data']['author']).encode('utf-8')
                try:
                    body = (x['data']['body']).encode('utf-8')
                except KeyError:
                    body = ''
                try:
                    link_title = (x['data']['link_title']).encode('utf-8')
                except:
                    link_title = ''
                subreddit = (x['data']['subreddit']).encode('utf-8')
                permalink = 'http://www.reddit.com/r/' + subreddit + '/comments/' + link_id + '/' + urllib2.quote(
                    link_title) + '/' + comment_id
                writeLatestPost(author, body, link_id, comment_id, link_title,
                                subreddit, epoch_time_found, permalink)

                keywords = opsecHeader.getUserKeywords(author, 'reddit')
                for keyword in keywords:
                    if keyword in body:
                        opsecHeader.sendEmail(keyword, "Reddit", author)
Esempio n. 4
0
def addPaste(title, id, paste):
    keywords = opsecHeader.getUserKeywords('all', 'pastebin')
    for keyword in keywords:
        if keyword in paste:
            now = int(time.mktime(time.localtime()))
            sql = "INSERT INTO `pastebin` (`epoch_time`, `title`, `paste`, `pasteID`, `keyword`) VALUES (%s, %s, %s, %s, %s)"
            try:
                if (opsecHeader.cur.execute(sql,
                                            (now, title, paste, id, keyword))):
                    opsecHeader.db.commit()
                    print "[+] Added."
            except:
                print '''[!] DB Problem (id:%s) NOT inserted''' % (id)
                print sys.exc_info()[0]
                return False
            opsecHeader.sendEmail(keyword, "Pastebin")
Esempio n. 5
0
def getUserTweets(user):
    screen_name = urllib2.quote(user)
    opsecHeader.writeLastCheckedTime('twitter')

    # See https://dev.twitter.com/docs/api/1/get/statuses/user_timeline
    tweetSinceDate = str(getLatestTweet(screen_name, None)[0])
    epochTimeExisting = getLatestTweet(screen_name, None)[1]

    twitterQueryString = 'https://api.twitter.com/1/statuses/user_timeline.json?screen_name=' + screen_name + '&count=10'

    if tweetSinceDate != '0':  # Twitter does not play nice with invalid since_id's
        twitterQueryString += '&since_id=' + tweetSinceDate

    opsecHeader.queryWebsiteJSON("twitterUserTweets", twitterQueryString)

    twitterResults = opsecHeader.readResultsJSON('twitterUserTweets')
    if twitterResults is not None:
        twitterAllResults = twitterResults
    else:
        twitterAllResults = None

    if not twitterAllResults:
        print "No results."
    else:
        for x in twitterAllResults:
            created_at = (x['created_at']).encode('utf-8')
            epochTimeFound = calendar.timegm(
                (email.utils.parsedate(created_at)))
            if int(epochTimeFound) > int(epochTimeExisting):
                twitterID = (x['id'])
                text = (x['text']).encode('utf-8')
                from_user = (x['user']['screen_name']).encode('utf-8')
                created_at = (x['created_at']).encode('utf-8')
                profile_image_url_https = (
                    x['user']['profile_image_url_https']).encode('utf-8')
                location, lat, lng = genGeo(from_user)

                writeTweet(twitterID, from_user, text, created_at, '',
                           location, lat, lng, epochTimeFound,
                           profile_image_url_https)
                keywords = opsecHeader.getUserKeywords(from_user, 'twitter')
                for keyword in keywords:
                    if keyword in text:
                        opsecHeader.sendEmail(keyword, "Twitter", from_user)
Esempio n. 6
0
def getUserComments(user):
    #http://www.reddit.com/dev/api

    user = urllib2.quote(user)

    redditQueryString = 'http://www.reddit.com/user/' + user + '/overview.json'
    opsecHeader.queryWebsiteJSON("reddit", redditQueryString, opsecHeader.reddit_api_key)
    opsecHeader.writeLastCheckedTime('reddit')

    redditResults = opsecHeader.readResultsJSON('reddit')
    try:
        redditAllResults = redditResults['data']['children']
    except KeyError:
        redditAllResults = None
    epoch_time_existing = getLatestUserEpoch(user)

    if not redditAllResults:
        print "No results."
    else:
        for x in redditAllResults:
            epoch_time_found = str((x['data']['created_utc'])).encode('utf-8')[:-2]
            if int(epoch_time_found) > int(epoch_time_existing):
                try:
                    link_id = (x['data']['link_id']).encode('utf-8')[3:]
                except KeyError:
                    link_id = ''
                comment_id = (x['data']['id']).encode('utf-8')
                author = (x['data']['author']).encode('utf-8')
                try:
                    body = (x['data']['body']).encode('utf-8')
                except KeyError:
                    body = ''
                try:
                    link_title = (x['data']['link_title']).encode('utf-8')
                except:
                    link_title = ''
                subreddit = (x['data']['subreddit']).encode('utf-8')
                permalink = 'http://www.reddit.com/r/' + subreddit + '/comments/' + link_id + '/' + urllib2.quote(link_title) + '/' + comment_id
                writeLatestPost(author, body, link_id, comment_id, link_title, subreddit, epoch_time_found, permalink)

                keywords = opsecHeader.getUserKeywords(author, 'reddit')
                for keyword in keywords:
                    if keyword in body:
                        opsecHeader.sendEmail(keyword, "Reddit", author)
Esempio n. 7
0
def getUserTweets(user):
    screen_name = urllib2.quote(user)
    opsecHeader.writeLastCheckedTime('twitter')

    # See https://dev.twitter.com/docs/api/1/get/statuses/user_timeline
    tweetSinceDate = str(getLatestTweet(screen_name, None)[0])
    epochTimeExisting = getLatestTweet(screen_name, None)[1]

    twitterQueryString = 'https://api.twitter.com/1/statuses/user_timeline.json?screen_name=' + screen_name + '&count=10'

    if tweetSinceDate != '0': # Twitter does not play nice with invalid since_id's
        twitterQueryString += '&since_id=' + tweetSinceDate

    opsecHeader.queryWebsiteJSON("twitterUserTweets", twitterQueryString)

    twitterResults = opsecHeader.readResultsJSON('twitterUserTweets')
    if twitterResults is not None:
        twitterAllResults = twitterResults
    else:
        twitterAllResults = None

    if not twitterAllResults:
        print "No results."
    else:
        for x in twitterAllResults:
            created_at = (x['created_at']).encode('utf-8')
            epochTimeFound = calendar.timegm((email.utils.parsedate(created_at)))
            if int(epochTimeFound) > int(epochTimeExisting):
                twitterID = (x['id'])
                text = (x['text']).encode('utf-8')
                from_user = (x['user']['screen_name']).encode('utf-8')
                created_at = (x['created_at']).encode('utf-8')
                profile_image_url_https = (x['user']['profile_image_url_https']).encode('utf-8')
                location, lat, lng = genGeo(from_user)

                writeTweet(twitterID, from_user, text, created_at, '', location, lat, lng, epochTimeFound, profile_image_url_https)
                keywords = opsecHeader.getUserKeywords(from_user, 'twitter')
                for keyword in keywords:
                    if keyword in text:
                        opsecHeader.sendEmail(keyword, "Twitter", from_user)
Esempio n. 8
0
def getPost(account_id, site, user_id, content_type):
    latest_epoch_time = getLatestPost(user_id, site, content_type)
    queryString = 'http://api.stackexchange.com/2.1/users/' + str(
        user_id
    ) + '/' + str(content_type) + 's?fromdate=' + str(
        latest_epoch_time
    ) + '&order=desc&sort=creation&site=' + site + '&key=' + opsecHeader.stackexchange_api_key
    opsecHeader.queryWebsiteJSON(
        str(site) + str(user_id) + str(content_type), queryString)
    opsecHeader.writeLastCheckedTime('stackexchange')

    results = opsecHeader.readResultsJSON(
        str(site) + str(user_id) + str(content_type))
    items = results['items']
    for x in items:

        creation_date = x['creation_date']
        if (latest_epoch_time != creation_date):

            if (content_type == 'question'):
                question_id = x['question_id']
                url = x['link']
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                dirty_content = soup.find('div', {
                    'class': 'post-text',
                    'itemprop': 'description'
                })
                content = ''.join(dirty_content.findAll(text=True))

            elif (content_type == 'answer'):
                answer_id = x['answer_id']
                url = "http://" + str(site) + ".com/a/" + str(answer_id)
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                answer_id = 'answer-' + str(answer_id)
                div_content = soup.find('div', {'id': answer_id})
                dirty_content = div_content.find('div', {'class': 'post-text'})
                content = ''.join(dirty_content.findAll(text=True))

            elif (content_type == 'comment'):
                comment_id = x['comment_id']
                post_id = x['post_id']
                short_url = 'http://' + str(site) + '.com/q/' + str(post_id)
                long_url = str(urllib2.urlopen(short_url).geturl())
                long_url = long_url.split("#")[0]
                url = long_url + '#comment' + str(comment_id) + '_' + str(
                    post_id)
                html = urllib2.urlopen(url).read()
                soup = BeautifulSoup(html)
                comment_id_format = 'comment-' + str(comment_id)
                try:  #Will fail if comments need to be loaded via AJAX
                    comment_tr = soup.find('tr', {'id': comment_id_format})
                    dirty_content = comment_tr.find('span',
                                                    {'class': 'comment-copy'})
                    content = ''.join(dirty_content.findAll(text=True))
                except AttributeError:
                    content = 'See website'

            profile_image = x['owner']['profile_image']
            display_name = x['owner']['display_name']

            writeDisplayName(account_id, display_name)
            writeLatestPost(account_id, user_id, site, content_type,
                            creation_date, profile_image, url, content,
                            display_name)

            keywords = opsecHeader.getUserKeywords(account_id, 'stackexchange')
            for keyword in keywords:
                if keyword in content:
                    opsecHeader.sendEmail(keyword, "Stack Exchange",
                                          display_name)
Esempio n. 9
0
    reddit.getUserComments(author)
except IndexError:
    print("No Reddit user found at index " + str(oneDigitMinute))

# StackExchange
try:
    account_id = stackexchange.getUsers()[oneDigitMinute]
    stackexchange.getUserPosts(account_id)
except IndexError:
    print("No StackExchange user found at index " + str(oneDigitMinute))

print("-------- General search --------")
if (minute % 5) == 0:
    print("Attempting general site search...")
    try:
        keyword = opsecHeader.getUserKeywords('all',
                                              'twitter')[fiveMinInterval]
        twitter.searchTwitter(keyword)
    except IndexError:
        print("No twitter keyword at index " + str(fiveMinInterval))

    try:
        keyword = opsecHeader.getUserKeywords('all',
                                              'facebook')[fiveMinInterval]
        facebook.searchFacebook(keyword)
    except IndexError:
        print("No facebook keyword at index " + str(fiveMinInterval))

    try:
        keyword = opsecHeader.getUserKeywords('all',
                                              'wordpress')[fiveMinInterval]
        wordpress.searchWordpress(keyword)
Esempio n. 10
0
except IndexError:
    print("No Reddit user found at index " + str(oneDigitMinute))

# StackExchange
try:
    account_id = stackexchange.getUsers()[oneDigitMinute]
    stackexchange.getUserPosts(account_id)
except IndexError:
    print("No StackExchange user found at index " + str(oneDigitMinute))


print("-------- General search --------")
if (minute % 5) == 0:
    print("Attempting general site search...")
    try:
        keyword = opsecHeader.getUserKeywords('all', 'twitter')[fiveMinInterval]
        twitter.searchTwitter(keyword)
    except IndexError:
        print("No twitter keyword at index " + str(fiveMinInterval))

    try:
        keyword = opsecHeader.getUserKeywords('all', 'facebook')[fiveMinInterval]
        facebook.searchFacebook(keyword)
    except IndexError:
        print("No facebook keyword at index " + str(fiveMinInterval))

    try:
        keyword = opsecHeader.getUserKeywords('all', 'wordpress')[fiveMinInterval]
        wordpress.searchWordpress(keyword)
    except IndexError:
        print("No wordpress keyword at index " + str(fiveMinInterval))