Ejemplo n.º 1
0
def search_twitter(raw_keyword):
    keyword = urllib2.quote(raw_keyword)
    opsecHeader.write_last_checked_time('twitter')

    # See https://dev.twitter.com/docs/api/1/get/search
    tweet_since_date = str(get_latest_tweet(None, keyword)[0])
    search_query_string = 'http://search.twitter.com/search.json?q=' + keyword + '&rpp=10&result_type=recent'

    if tweet_since_date != '0':  # Twitter does not play nice with invalid since_id's
        search_query_string += '&since_id=' + tweet_since_date

    opsecHeader.query_website_json("twitter", search_query_string)

    twitter_results = opsecHeader.read_results_json('twitter')
    twitter_all_results = twitter_results['results']

    if not twitter_all_results:
        print "No results."
    else:
        existing_epoch_time = get_latest_tweet(None, keyword)[1]

        for i in twitter_all_results:
            created_at = (i['created_at']).encode('utf-8')
            epoch_time_found = calendar.timegm((time.strptime(created_at, '%a, %d %b %Y %H:%M:%S +0000')))
            if int(epoch_time_found) > int(existing_epoch_time):
                twitter_id = (i['id'])
                from_user = (i['from_user']).encode('utf-8')
                text = (i['text']).encode('utf-8')
                created_at = (i['created_at']).encode('utf-8')
                profile_image_url_https = (i['profile_image_url_https']).encode('utf-8')
                location, lat, lng = gen_geo(from_user)

                write_tweet(twitter_id, from_user, text, created_at, keyword, location, lat, lng, epoch_time_found, profile_image_url_https)
                opsecHeader.send_email(keyword, "Twitter")
Ejemplo n.º 2
0
    def search_wordpress(self, raw_keyword):
        keyword = urllib2.quote(raw_keyword)
        opsecHeader.write_last_checked_time('wordpress')

        ############### WORDPRESS ##################
        #
        # See http://en.search.wordpress.com/?q=obama&s=date&f=json
        #
        # Arguments:
        # q = keyword to search for
        # s = sort by; we want date; not relevance
        # f = format; we want JSON

        wordpress_query_string = 'http://en.search.wordpress.com/?q=' + keyword + '&s=date&f=json'

        opsecHeader.query_website_json("wordpress", wordpress_query_string)

        wordpress_latest_epoch = self.get_latest_wordpress()
        wordpress_results = opsecHeader.read_results_json('wordpress')
        epoch_time = wordpress_results[0]['epoch_time']

        if str(wordpress_latest_epoch) == str(epoch_time):
            print "No new blog posts since last query."
        else:
            for i in wordpress_results:
                epoch_time = i['epoch_time']
                if int(wordpress_latest_epoch) < int(epoch_time):
                    title = (i['title']).encode('utf-8')
                    author = (i['author']).encode('utf-8')
                    content = (i['content']).encode('utf-8')
                    link = (i['link']).encode('utf-8')
                    self.write_latest_wordpress(epoch_time, title, author, content, link, keyword)
                    opsecHeader.send_email(keyword, "Wordpress")
Ejemplo n.º 3
0
    def get_post(self, account_id, site, user_id, content_type):
        latest_epoch_time = self.get_latest_post(user_id, site, content_type)
        query_string = 'http://api.stackexchange.com/2.1/users/' + str(user_id) + '/' + str(content_type) + 's?fromdate=' + str(latest_epoch_time) + '&order=desc&sort=creation&site=' + site + '&key=' + self.api_key
        opsecHeader.query_website_json(str(site) + str(user_id) + str(content_type), query_string)
        opsecHeader.write_last_checked_time('stackexchange')

        results = opsecHeader.read_results_json(str(site) + str(user_id) + str(content_type))
        items = results['items']
        for i in items:

            creation_date = i['creation_date']
            if(latest_epoch_time != creation_date):

                if(content_type == 'question'):
                    url = i['link']
                    html = urllib2.urlopen(url).read()
                    soup = BeautifulSoup(html)
                    dirty_content = soup.find('div', {'class': 'post-text', 'itemprop': 'description'})
                    content = ''.join(dirty_content.findAll(text=True))

                elif(content_type == 'answer'):
                    answer_id = i['answer_id']
                    url = "http://" + str(site) + ".com/a/" + str(answer_id)
                    html = urllib2.urlopen(url).read()
                    soup = BeautifulSoup(html)
                    answer_id = 'answer-' + str(answer_id)
                    div_content = soup.find('div', {'id': answer_id})
                    dirty_content = div_content.find('div', {'class': 'post-text'})
                    content = ''.join(dirty_content.findAll(text=True))

                elif(content_type == 'comment'):
                    comment_id = i['comment_id']
                    post_id = i['post_id']
                    short_url = 'http://' + str(site) + '.com/q/' + str(post_id)
                    long_url = str(urllib2.urlopen(short_url).geturl())
                    long_url = long_url.split("#")[0]
                    url = long_url + '#comment' + str(comment_id) + '_' + str(post_id)
                    html = urllib2.urlopen(url).read()
                    soup = BeautifulSoup(html)
                    comment_id_format = 'comment-' + str(comment_id)
                    try:  # Will fail if comments need to be loaded via AJAX
                        comment_tr = soup.find('tr', {'id': comment_id_format})
                        dirty_content = comment_tr.find('span', {'class': 'comment-copy'})
                        content = ''.join(dirty_content.findAll(text=True))
                    except AttributeError:
                        content = 'See website'

                profile_image = i['owner']['profile_image']
                display_name = i['owner']['display_name']

                self.write_display_name(account_id, display_name)
                self.write_latest_post(account_id, user_id, site, content_type, creation_date, profile_image, url, content, display_name)

                keywords = opsecHeader.get_user_keywords(account_id, 'stackexchange')
                for keyword in keywords:
                    if keyword in content:
                        opsecHeader.send_email(keyword, "Stack Exchange", display_name)
Ejemplo n.º 4
0
    def get_user_comments(self, user):
        #http://www.reddit.com/dev/api

        user = urllib2.quote(user)

        reddit_query_string = 'http://www.reddit.com/user/' + user + '/overview.json'
        opsecHeader.query_website_json("reddit", reddit_query_string, opsecHeader.reddit_api_key)
        opsecHeader.write_last_checked_time('reddit')

        reddit_results = opsecHeader.read_results_json('reddit')
        try:
            reddit_all_results = reddit_results['data']['children']
        except KeyError:
            reddit_all_results = None
        epoch_time_existing = self.get_latest_user_epoch(user)

        if not reddit_all_results:
            print "No results."
        else:
            for i in reddit_all_results:
                epoch_time_found = str((i['data']['created_utc'])).encode('utf-8')[:-2]
                if int(epoch_time_found) > int(epoch_time_existing):
                    try:
                        link_id = (i['data']['link_id']).encode('utf-8')[3:]
                    except KeyError:
                        link_id = ''
                    comment_id = (i['data']['id']).encode('utf-8')
                    author = (i['data']['author']).encode('utf-8')
                    try:
                        body = (i['data']['body']).encode('utf-8')
                    except KeyError:
                        body = ''
                    try:
                        link_title = (i['data']['link_title']).encode('utf-8')
                    except KeyError:
                        link_title = ''
                    subreddit = (i['data']['subreddit']).encode('utf-8')
                    permalink = 'http://www.reddit.com/r/' + subreddit + '/comments/' + link_id + '/' + urllib2.quote(link_title) + '/' + comment_id
                    self.write_latest_post(author, body, link_id, comment_id, link_title, subreddit, epoch_time_found, permalink)

                    keywords = opsecHeader.get_user_keywords(author, 'reddit')
                    for keyword in keywords:
                        if keyword in body:
                            opsecHeader.send_email(keyword, "Reddit", author)
Ejemplo n.º 5
0
 def gen_geo(self, from_user):
     geo_query_string = 'https://api.twitter.com/1.1/users/show.json?screen_name=' + from_user
     opsecHeader.query_website_oauth_json("twitterGeo", geo_query_string, self.consumer_key, self.consumer_secret, self.access_token, self.access_token_secret)
     results = opsecHeader.read_results_json('twitterGeo')
     location = (results['location']).encode('utf-8')
     if not location:
         return 'null', '0.0000000', '0.0000000'
     else:
         google_query_string = 'http://maps.googleapis.com/maps/api/geocode/json?&address=' + urllib2.quote(location) + '&sensor=false'
         opsecHeader.query_website_json("googleGeoCode", google_query_string)
         google_results = opsecHeader.read_results_json('googleGeoCode')
         google_all_results = google_results['results']
         if not google_all_results:
             return location, '0.0000000', '0.0000000'
         else:
             for i in google_all_results:
                 lat = (i['geometry']['location']['lat'])
                 lng = (i['geometry']['location']['lng'])
                 return location, lat, lng
Ejemplo n.º 6
0
def get_user_tweets(user):
    screen_name = urllib2.quote(user)
    opsecHeader.write_last_checked_time('twitter')

    # See https://dev.twitter.com/docs/api/1/get/statuses/user_timeline
    tweet_since_date = str(get_latest_tweet(screen_name, None)[0])
    epoch_time_existing = get_latest_tweet(screen_name, None)[1]

    twitter_query_string = 'https://api.twitter.com/1/statuses/user_timeline.json?screen_name=' + screen_name + '&count=10'

    if tweet_since_date != '0':  # Twitter does not play nice with invalid since_id's
        twitter_query_string += '&since_id=' + tweet_since_date

    opsecHeader.query_website_json("twitterUserTweets", twitter_query_string)

    twitter_results = opsecHeader.read_results_json('twitterUserTweets')
    if twitter_results is not None:
        twitter_all_results = twitter_results
    else:
        twitter_all_results = None

    if not twitter_all_results:
        print "No results."
    else:
        for i in twitter_all_results:
            created_at = (i['created_at']).encode('utf-8')
            epoch_time_found = calendar.timegm((email.utils.parsedate(created_at)))
            if int(epoch_time_found) > int(epoch_time_existing):
                twitter_id = (i['id'])
                text = (i['text']).encode('utf-8')
                from_user = (i['user']['screen_name']).encode('utf-8')
                created_at = (i['created_at']).encode('utf-8')
                profile_image_url_https = (i['user']['profile_image_url_https']).encode('utf-8')
                location, lat, lng = gen_geo(from_user)

                write_tweet(twitter_id, from_user, text, created_at, '', location, lat, lng, epoch_time_found, profile_image_url_https)
                keywords = opsecHeader.get_user_keywords(from_user, 'twitter')
                for keyword in keywords:
                    if keyword in text:
                        opsecHeader.send_email(keyword, "Twitter", from_user)
Ejemplo n.º 7
0
    def search_facebook(self, raw_keyword):
        opsecHeader.write_last_checked_time('facebook')
        keyword = urllib2.quote(raw_keyword)
        # See https://developers.facebook.com/docs/reference/api/
        #
        # Arguments:
        # q = keyword we are searching for
        # type = kind of object we are searching for e.g post
        #
        # Returns:
        # name; id (facebook.com/id for their profile)

        facebook_latest_epoch = self.get_latest_post_time()
        facebook_query_string = 'https://graph.facebook.com/search?q=' + keyword + '&type=post'
        opsecHeader.query_website_json("facebook", facebook_query_string)

        print "Parsing Facebook data..."

        facebook_results = opsecHeader.read_results_json('facebook')
        facebook_all_results = facebook_results['data']

        if facebook_all_results:
            for i in facebook_all_results:
                if 'message' in i:
                    message = i['message'].encode('utf-8')
                    name = (i['from']['name']).encode('utf-8')
                    user_id = (i['from']['id']).encode('utf-8')
                    updated_time = (i['updated_time']).encode('utf-8')
                    epoch_time = calendar.timegm((time.strptime(updated_time, '%Y-%m-%dT%H:%M:%S+0000')))

                    if int(epoch_time) > int(facebook_latest_epoch):
                        profile_picture = self.get_profile_picture(user_id)
                        self.write_latest_post(name, user_id, message, profile_picture, updated_time, keyword, epoch_time)
                        opsecHeader.send_email(keyword, "Facebook")
                        print "Updated Time: " + updated_time
                    else:
                        print "Post too old."
Ejemplo n.º 8
0
    def get_user_accounts(self, stackexchange_account):
        print("Getting StackExchange user accounts...")
        associated_query_string = 'http://api.stackexchange.com/2.1/users/' + str(stackexchange_account) + '/associated?key=' + self.api_key
        opsecHeader.query_website_json("StackExchangeUserAccounts", associated_query_string)

        results = opsecHeader.read_results_json('StackExchangeUserAccounts')
        items = results['items']

        # Set default accounts to 1; non-existant accounts
        stackoverflow_user_id = 1
        serverfault_user_id = 1

        for i in items:
            site_name = i['site_name']
            user_id = i['user_id']
            print site_name
            print user_id
            if (site_name == "Stack Overflow"):
                stackoverflow_user_id = user_id
            if (site_name == "Server Fault"):
                serverfault_user_id = user_id
            account_id = i['account_id']
            print i
        self.add_accounts(account_id, stackoverflow_user_id, serverfault_user_id)