コード例 #1
0
    def get_service_channel(self, lookup_by_page_ids=True):
        candidates = self.get_attached_service_channels()
        if not candidates and lookup_by_page_ids:
            # Fallback to lookup by token/page ids
            if self.facebook_access_token:
                candidates = FacebookServiceChannel.objects.find(
                    account=self.account,
                    facebook_access_token=self.facebook_access_token)[:]
            if not candidates:
                candidates = FacebookServiceChannel.objects.find(
                    account=self.account,
                    facebook_page_ids__in=self.facebook_page_ids)[:]
            if not candidates:
                return None

            if len(candidates) == 1:
                return candidates[0]
            else:
                LOGGER.error(
                    "We have multiple candidates for service channel matching for enterprise channel %s" %
                    self)
                return None

        if len(candidates) > 1:
            LOGGER.warn("We have multiple candidates for service channel matching "
                        "for enterprise channel %s" % self)
        if candidates:
            return candidates[0]
コード例 #2
0
ファイル: facebook_extra.py プロジェクト: princez1214/flask
def __get_channel_events(channel, user):

    tracked_pages = [page for page in channel.all_facebook_pages]
    if not tracked_pages:
        try:
            tracked_pages = facebook_driver.GraphAPI(
                channel.get_access_token(user),
                channel=channel).get_object('/me/accounts')['data']
        except facebook.GraphAPIError as e:
            app.logger.error(e)

    events = []
    for page in tracked_pages:
        token = page['access_token']
        api = facebook_driver.GraphAPI(token)
        try:
            res = api.get_object(page['id'] + '/events')
            for itm in res['data']:
                itm['page_id'] = page['id']
                itm['access_token'] = token
                itm['type'] = 'event'
            events.extend(res['data'])
        except facebook.GraphAPIError, e:
            if e.result['error']['code'] == 32:
                from solariat_bottle.settings import LOGGER
                LOGGER.warn("Page has hit rate limit: id=%s name=%s",
                            page.get('id'), page.get('name'))
                continue
            raise e
コード例 #3
0
    def preprocess_post(self, event_json):
        if isinstance(event_json, (tuple, list)):
            message_type, data = event_json
            post_data = None
            preprocess = self.preprocessors.get(message_type)
            if preprocess is None:
                LOGGER.warn(u"Unknown message type: %s\nEvent is: %s" %
                            (message_type, event_json))
                return None

            try:
                post_data = preprocess(data)
            except:
                import traceback
                traceback.print_exc()
                LOGGER.warn(u"Error parsing tweet: %s" % unicode(event_json))

            if post_data:
                return post_data
            else:
                LOGGER.info(u"Twitter event: %s" % unicode(event_json))

        elif isinstance(event_json, dict):
            # already processed
            return event_json
        return None
コード例 #4
0
def twitter_status_to_post_dict(data):
    base_url = 'https://twitter.com/%s/statuses/%s'

    if 'text' in data or 'full_text' in data:
        extended_tweet = data.get('extended_tweet') or {}
        is_retweet = 'retweeted_status' in data
        post_fields = {'twitter': {'_wrapped_data': json.dumps(data)}}
        post_fields['twitter'].update(
            _source=TweetSource.TWITTER_PUBLIC_STREAM, _is_retweet=is_retweet)

        if 'user' in data:
            author = data['user']
            content = data.get('full_text') or extended_tweet.get(
                'full_text') or data.get('text')
        else:
            LOGGER.warn(u"Mis-formatted twitter data %s" % data)
            return

        def _get_tweet_lang(data):
            if 'lang' in data and data['lang']:
                return data['lang']
            elif 'lang' in author:
                return author['lang']

        post_fields['twitter'].update(
            _is_manual_retweet=not is_retweet and content.startswith('RT'))
        post_fields['lang'] = _get_tweet_lang(data)
        post_fields['user_profile'] = parse_user_profile(author)
        post_fields['content'] = content
        post_fields['url'] = base_url % (author['screen_name'], data['id_str'])

        #twitter data used to link a post to possible thread
        post_fields['twitter'].update(TweetParser()(data))
        return post_fields
コード例 #5
0
def filter_ids(keys):
    " check that all keys type of int "
    _keys = []
    for key in keys:
        try:
            _keys.append(str(int(key)))
        except ValueError:
            LOGGER.warn('incorrect id: %s', key)
    return _keys
コード例 #6
0
ファイル: base.py プロジェクト: princez1214/flask
 def _construct_query(self, ignored_params=None, *args, **kwargs):
     """ Build the appropriate filter query for the model from the request parameters"""
     result = {}
     if ignored_params is None:
         # By default we want to skip reserved fields which are used for different purposes
         ignored_params = self.reserved_fields
     _fields = self.model.fields.keys()
     # Now just iterate parameters and construct the required query
     for filter_name, filter_value in kwargs.items():
         if filter_name in ignored_params:
             continue
         parts = filter_name.split('__')
         if parts[0] not in _fields:
             LOGGER.warn("%s supposed to be filter but not in fields for %s, got %s" % (filter_name,
                                                                                        self.model,
                                                                                        str(kwargs)))
             continue
         result[str(filter_name)] = filter_value
     return result
コード例 #7
0
def subscribe_to_app(server, callback_url):
    while not server.started:
        LOGGER.warn("Server not started, going to sleep")
        time.sleep(5)
    LOGGER.info("Subscribing to app")
    # Now subscribe to our app on facebook
    G = facebook_driver.GraphAPI(version='2.2')
    app_access_token = FACEBOOK_APP_ID + "|" + FACEBOOK_APP_SECRET
    path = FACEBOOK_APP_ID + "/subscriptions"
    post_args = {
        'access_token': app_access_token,
        'callback_url': callback_url,
        'fields': 'feed',
        'object': 'page',
        'verify_token': 'token'
    }
    subs = G.request(G.version + "/" + path, post_args=post_args)
    if subs:
        print "Subscription response was: " + str(subs)
コード例 #8
0
ファイル: purging.py プロジェクト: princez1214/flask
def mark_and_sweep_topics(channel_or_tag, time_slot, rank=None):
    '''
    Given any time slot, this algorithm will remove all root topics that
    are not in the top list
    '''

    # Reset everything with a counter. We use a random number generator so that
    # we have to be very specific on what to remove, to avoid accidents
    counter = random.randrange(MARKED_TO_KEEP + 1, BIGGEST_COUNTER_VALUE)
    update = mark_to_remove(channel_or_tag, time_slot, counter)
    initial_count = update['n']
    #logger.debug("Reset %s items" % initial_count)

    # Now recursively mark items to keep
    marked = mark_items_to_keep(channel_or_tag, time_slot, rank=rank)
    #logger.debug("Marked %d items to keep" % marked)

    # Now remove what is left
    update = remove_records(counter)
    removed = update['n']
    #logger.debug("Removed %d items" % removed)

    stats = initial_count, marked, removed
    if (initial_count > (marked + removed) + VERIFICATION_DELTA
            or initial_count < (marked + removed) - VERIFICATION_DELTA):
        msg_info = {
            "channel": channel_or_tag.title,
            "initial_count": initial_count,
            "marked": marked,
            "removed": removed,
            "real_sum": marked + removed
        }
        subject = """[!]checksum for topics FAILED during purging. Channel: %s""" % channel_or_tag.title
        body = MSG_TEMPLATE % msg_info
        send_notification_for_team(subject=subject, body=body)
        LOGGER.warn(
            "invalid checksum for topics:: channel: %s; %d initially, %d marked, and %d removed",
            channel_or_tag.title, *stats)

    return stats
コード例 #9
0
    def prepare(self, data, target_id, channel, driver):

        matched_channels = [channel]

        if not matched_channels:
            LOGGER.warn(
                'Matched channels for %s EnterpriseFacebookChannel not found',
                channel.id)

        outbound_ids = []
        if channel.facebook_handle_id:
            outbound_ids.append(str(channel.facebook_handle_id))
        if channel.facebook_page_ids:
            outbound_ids.extend(channel.facebook_page_ids)

        post_fields = {}
        post_fields.update(self.handle(data, target_id))
        sender_id = post_fields.pop('sender_id')
        self.update_with_channels(post_fields, sender_id, outbound_ids,
                                  matched_channels)
        self.update_with_user_profile(post_fields, sender_id, driver)

        return post_fields
コード例 #10
0
    def fetch_and_post(self):
        ch_id = self.subscription.channel.id
        for posts_processed, entry in enumerate(self._query_posts(),
                                                start=self.posts_processed +
                                                1):
            if self.subscriber.stopped():
                break

            post_fields = entry.solariat_post_data
            if not post_fields:
                LOGGER.warning('no post_fields in: %s', entry)
                self.flush_buffer()
                continue

            try:
                log_state(ch_id, get_post_natural_id(post_fields),
                          PostState.REMOVED_FROM_WORKER_QUEUE)
            except KeyError:
                LOGGER.error('cannot get post id: %s', post_fields)

            channels = self._find_channels(post_fields)
            if channels:
                post_fields['channels'] = channels
                self.post_queue.put(post_fields)  # blocked by queue maxsize
                self.posts_queued += 1
                self._fetch_buffer.append(entry)
            else:
                LOGGER.warn("No channels found for queued post %s\n"
                            "queue item id: %s" % (post_fields, entry.id))

            self.posts_processed = posts_processed
            if posts_processed % self.UPDATE_PROGRESS_EVERY == 0:
                self.update_progress()
                self.subscriber.aggregate_state(self,
                                                {'running': self.progress})
            self.flush_buffer()
            self.subscriber.update_status(SUBSCRIPTION_RUNNING)
コード例 #11
0
ファイル: purging.py プロジェクト: princez1214/flask
def mark_and_sweep_trends(channel_or_tag, time_slot, topics):
    counter = random.randrange(MARKED_TO_KEEP + 1, BIGGEST_COUNTER_VALUE)
    marked_to_remove = trends_mark_to_remove(time_slot, channel_or_tag,
                                             counter)
    marked_to_keep = trends_mark_to_keep(time_slot, channel_or_tag, topics)
    remove_result = trends_remove(counter)
    initial_count, marked, removed = marked_to_remove['n'], marked_to_keep[
        'n'], remove_result['n']
    if (initial_count > (marked + removed) + VERIFICATION_DELTA
            or initial_count < (marked + removed) - VERIFICATION_DELTA):
        msg_info = {
            "channel": channel_or_tag.title,
            "initial_count": initial_count,
            "marked": marked,
            "removed": removed,
            "real_sum": marked + removed
        }
        subject = """[!]checksum for trends FAILED during purging. Channel: %s""" % channel_or_tag.title
        body = MSG_TEMPLATE % msg_info
        send_notification_for_team(subject=subject, body=body)
        LOGGER.warn(
            "invalid checksum for trends:: channel: %s; %d initially, %d marked, and %d removed",
            channel_or_tag.title, initial_count, marked, removed)
    return initial_count, marked, removed
コード例 #12
0
 def get_timerange_level(self):
     try:
         return guess_timeslot_level(parse_datetime(self.filters['from']), parse_datetime(self.filters['to']))
     except:
         LOGGER.warn('Unknown period to determine the timerange level')
コード例 #13
0
    def post_received(self, post_field):
        """ Expose post_received functionality mainly for testing purposes. Could also use
         it for loading post data directly through bot in case of historics / load_data scripts """
        post_field = json.loads(post_field)
        print "Received " + str(post_field)
        if post_field['object'] == 'page':

            for entry in post_field['entry']:

                page_id = str(entry['id'])

                efc_channel = FacebookClient.get_facebook_channel(page_id)
                if not efc_channel:
                    LOGGER.warn(
                        'Active EnterpriseFacebookChannel not found for page_id %s',
                        page_id)
                    break
                check_channel_token_valid(efc_channel)

                matched_chanels = FacebookClient.get_service_channels(page_id)
                if not matched_chanels:
                    LOGGER.warn(
                        'Active FacebookServiceChannel not found for page_id %s',
                        page_id)
                    break

                G = facebook_driver.GraphAPI(efc_channel.facebook_access_token,
                                             channel=efc_channel)

                oubound_ids = []
                if efc_channel.facebook_handle_id:
                    oubound_ids.append(str(efc_channel.facebook_handle_id))
                oubound_ids.extend(
                    [str(u_id) for u_id in efc_channel.facebook_account_ids])

                attachments_ids = []
                for change in entry['changes']:
                    if change['value']['item'] in ('photo', ):
                        attachments_ids.append(change['value'].get(
                            'post_id', None))

                for change in entry['changes']:

                    post_fields = {}

                    if change['field'] == 'feed':

                        if change['value']['item'] == 'comment':
                            post_fields.update(
                                self.process_comment(
                                    G=G,
                                    page_id=page_id,
                                    change=change,
                                    attachments_ids=attachments_ids))
                        if change['value']['item'] == 'status':
                            post_fields.update(
                                self.process_status_update(
                                    G=G,
                                    page_id=page_id,
                                    change=change,
                                    attachments_ids=attachments_ids))
                        if change['value']['item'] in ('post', 'photo'):
                            post_fields.update(
                                self.process_post(
                                    G=G,
                                    page_id=page_id,
                                    change=change,
                                    attachments_ids=attachments_ids))
                        sender_id = post_fields.pop('sender_id', None)
                        if change['value']['item'] in ('comment', 'status',
                                                       'post', 'photo'):
                            service_chns = []
                            for candidate in matched_chanels:
                                if (sender_id in oubound_ids or sender_id
                                        in candidate.facebook_page_ids
                                        or self.facebook_user_match(
                                            G, sender_id,
                                            efc_channel.facebook_handle_id)):
                                    # If the sender is the same as the authenticated user, it's an outbound post
                                    service_chns.append(
                                        str(candidate.outbound_channel.id))
                                else:
                                    # Otherwise we have an inbound post.
                                    service_chns.append(
                                        str(candidate.inbound_channel.id))

                            post_fields['channels'] = service_chns
                            user_profile = G.get_object(sender_id)

                            post_fields['user_profile'] = {
                                'platform_data': user_profile
                            }
                            if 'name' in user_profile:
                                post_fields['user_profile'][
                                    'user_name'] = user_profile['name']
                            if 'username' in user_profile:
                                post_fields['user_profile'][
                                    'id'] = user_profile['username']
                            picture = G.get_object(sender_id + '/picture')
                            if picture and picture['data'] and not picture.get(
                                    'is_silhouette', False):
                                post_fields['user_profile'][
                                    'profile_image_url'] = picture['url']
                            if 'location' in user_profile:
                                if isinstance(user_profile['location'], dict):
                                    if 'city' in user_profile['location']:
                                        post_fields['user_profile'][
                                            'location'] = user_profile[
                                                'location']['city']
                                    elif 'country' in user_profile['location']:
                                        post_fields['user_profile'][
                                            'location'] = user_profile[
                                                'location']['country']
                                elif isinstance(user_profile['location'],
                                                basestring):
                                    post_fields['user_profile'][
                                        'location'] = user_profile['location']
                            print "Pushed " + str(post_fields)
                            self.post_queue.put(post_fields)
コード例 #14
0
def combine_and_split(channel_key_map=None,
                      max_track=400,
                      max_follow=5000,
                      fetch_missing_profiles=True):
    user_name_to_id, missing_profiles = build_uname_to_id_map(channel_key_map)

    if missing_profiles and fetch_missing_profiles:
        LOGGER.info(u"Fetching missing profiles: %s" % missing_profiles)
        from solariat_bottle.utils.tweet import TwitterApiWrapper
        from solariat_bottle.utils.oauth import get_twitter_oauth_handler
        from solariat_bottle.db.user_profiles.user_profile import UserProfile
        from solariat_bottle.daemons.helpers import parse_user_profile
        try:
            api = TwitterApiWrapper.make_api(get_twitter_oauth_handler())
            for user in api.lookup_users(screen_names=missing_profiles,
                                         include_entities=True):
                UserProfile.objects.upsert(
                    'Twitter', profile_data=parse_user_profile(user))
                uname = user.screen_name.lower()
                user_name_to_id[uname] = str(user.id)
                missing_profiles.discard(uname)
        except:
            pass

    if missing_profiles:
        LOGGER.warn(u'Missing UserProfiles. '
                    u'User names won\'t be tracked: %s' % missing_profiles)

    # regroup by postfilter entry
    group_by_entry = defaultdict(lambda: {
        'accounts': set(),
        'channels': set()
    })
    for channel, key_map in channel_key_map.iteritems():
        for entry_type, entries in key_map.iteritems():
            if entry_type == 'SKIPWORD':
                continue

            for entry in entries:
                entry_key = None
                if entry_type == 'USER_NAME':
                    uid = user_name_to_id.get(
                        preprocess_keyword(entry, strip_special_chars=True))
                    if not uid:
                        LOGGER.warn(u'user id not found for %s' % entry)
                        continue
                    entry_key = ('USER_ID', uid)
                if entry_type == 'USER_ID' or entry_type == 'KEYWORD':
                    updated_entry = preprocess_keyword(entry)
                    if not updated_entry:
                        LOGGER.warn('Skipped keyword %s' % entry)
                        continue
                    else:
                        entry_key = (entry_type, updated_entry)

                group_by_entry[entry_key]['channels'].add(channel)
                group_by_entry[entry_key]['accounts'].add(channel.account)

    def _minimize(group_by_entry):
        """if there is keyword without [@#] then remove same keywords with @#
        and merge channels"""
        result = defaultdict(lambda: {'accounts': set(), 'channels': set()})
        for item, channels_and_accounts in group_by_entry.iteritems():
            filter_type, value = item
            if filter_type == 'KEYWORD':
                clean_value = value.lstrip('#').lstrip('@')
                key = (filter_type, clean_value)
                if key in group_by_entry:
                    result[key]['accounts'].update(
                        channels_and_accounts['accounts'])
                    result[key]['channels'].update(
                        channels_and_accounts['channels'])
                else:
                    result[item] = channels_and_accounts
            else:
                result[item] = channels_and_accounts
        return result

    def _iter_parts(group_by_entry):
        from itertools import izip_longest, ifilter

        def _filter(entry_type):
            return ifilter(
                lambda ((filter_type, _1), _2): filter_type == entry_type,
                group_by_entry.iteritems())

        for keywords_data, user_ids_data in izip_longest(
                partition(_filter('KEYWORD'), max_track),
                partition(_filter('USER_ID'), max_follow),
                fillvalue=[]):
            merged_channels = set()
            merged_accounts = set()
            keywords = []
            user_ids = []
            for item, accounts_and_channels in chain(keywords_data,
                                                     user_ids_data):
                filter_type, value = item
                merged_channels.update(accounts_and_channels['channels'])
                merged_accounts.update(accounts_and_channels['accounts'])
                if filter_type == 'KEYWORD':
                    keywords.append(value)
                elif filter_type == 'USER_ID':
                    user_ids.append(value)

            yield keywords, user_ids, merged_accounts, merged_channels

    return list(_iter_parts(_minimize(group_by_entry)))
コード例 #15
0
ファイル: timeline_request.py プロジェクト: princez1214/flask
    def execute_request(self, failfast=False):
        import time
        import datetime

        retry_attempts = 0
        max_retry_attempts = self.config.get('retry_count', 10)
        min_delay = self.config.get('min_retry_delay', 60)  # 1 minute
        incr = 2
        max_delay = self.config.get('max_retry_delay', 1800)  # 30 minutes
        delay = min_delay

        method = getattr(self.api, self.method)
        exc = None
        _start = datetime.datetime.utcnow()

        while retry_attempts < max_retry_attempts:
            try:
                params = self.get_method_params()
                if not params:
                    LOGGER.warn(u"%s.%s got no params. Filters were: %s" %
                                (self.api, self.method, self.filters))
                    self.result = []
                else:
                    LOGGER.info(
                        u"Executing %s.%s with params: %s\nFilters: %s" %
                        (self.api, self.method, dumps(params),
                         dumps(self.filters)))
                    self.result = self.parse_response(method(**params))
                    self.filtered_result = filter(self._filter_tweet,
                                                  self.result)
                    self.filters.update(
                        max_id=self.max_id)  # update filters with next max_id
            except TweepError as e:
                exc = e
                # non-rate-limit error during performing request or parsing response;
                # rate-limit errors with 420, 429 statuses are handled by tweepy
                LOGGER.error(e, exc_info=True)
                # search api may respond with {"error": "Sorry, your query is too complex. Please reduce complexity and try again."}
                if "query is too complex" in unicode(e):
                    break

                retry_attempts += 1
                time.sleep(delay)
                delay = min(max_delay, delay * incr)
            except TwitterApiRateLimitError as e:
                LOGGER.debug(
                    '[execute_request] rate limits, waiting %s seconds',
                    e.wait_for)
                self.subscriber.aggregate_state(
                    self, {'wait_rate_limit_reset': e.wait_for})
                raise
            else:
                LOGGER.debug('[execute_request] len(self.result)=%s',
                             len(self.result))
                if len(self.result) == 0 or self.filters_fulfilled():
                    self._done = True
                    self.subscriber.aggregate_state(self, {'finished': True})
                else:
                    self.subscriber.aggregate_state(self,
                                                    {'running': self.progress})
                break

        _elapsed = datetime.datetime.utcnow() - _start
        if exc is not None:
            LOGGER.error(u"Could not retrieve results from twitter after %s" %
                         _elapsed)
            self.subscriber.aggregate_state(self, {'failed': str(exc)})
            if failfast is True:
                raise exc
            else:
                self.result = []
                self._done = True
コード例 #16
0
ファイル: auth_pool.py プロジェクト: princez1214/flask
 def sync(self):
     if self._in_use:
         LOGGER.warn(u"Sync auth pool while in use {}".format(self._in_use))
         self._in_use = {}
     for auth in AuthPool.get_auth_pool():
         self.put(auth)