Ejemplo n.º 1
0
  def test_create_event_message(self):
    services = ['facebook', 'twitter', 'instagram', 'foursquare', 'linkedin']
    tim_author_id = 1234
    service_user_id = 'service_user_id'
    service_event_id = '987654321'
    service_link_name = 'instagram'
    service_link_event_id = '543216789'
    json_dict = {'key': 'value'}

    for service in services:
      message = messages.create_event_message(
          service,
          tim_author_id,
          messages.CURRENT_STATE,
          service_user_id,
          service_event_id,
          json_dict,
          [messages.create_event_link(service_link_name, service_link_event_id)])

      self.assertEqual(message['header']['type'], service + '.event')
      self.assertEqual(message['message']['tim_author_id'], tim_author_id)
      self.assertEqual(message['message']['state'], messages.CURRENT_STATE)
      self.assertEqual(message['message']['service_author_id'], service_user_id)
      self.assertEqual(message['message']['service_event_id'], service_event_id)
      self.assertEqual(message['message']['service_event_json'], json_dict)
      self.assertEqual(message['message']['links'][0]['service_id'], service_link_name)
      self.assertEqual(message['message']['links'][0]['service_event_id'], service_link_event_id)
  def fetch(self, service_id, service_author_id, service_event_id, callback):

    asm = self.get_author_service_map(service_author_id)

    if asm.access_token:
      access_token = asm.access_token
    else:
      access_token = self.oauth_config['user1_access_token']

    args = {'access_token': access_token}

    # fetch latest version of event
    url = '{0}{1}{2}?{3}'.format(self.oauth_config['endpoint'],
                                 self.MEDIA_INFO,
                                 service_event_id,
                                 urllib.urlencode(args))

    raw_obj = json_serializer.load(urllib2.urlopen(url))

    post = raw_obj['data']

    interpreter = InstagramEventInterpreter(post, asm, self.oauth_config)

    # TODO - unclear if/why the link meta data should be included -- included here because
    #        relationships are not being properly maintained
    callback(create_instagram_event(
          asm.author_id,
          CURRENT_STATE,
          service_author_id,
          interpreter.get_id(),
          post,
          [create_event_link(data_access.service.name_to_id('instagram'),
                             '_{0}@{1}'.format(self.service_name, asm.author_id))]))
  def fetch(self, service_author_id, callback):

    super(FacebookEventCollector, self).fetch(service_author_id, callback)

    state = self.fetch_begin(service_author_id)

    self.fetch_log_info(state)

    asm = state['asm']

    args = {'access_token': asm.access_token}

    # get only events since last update or past year depending on if this
    # is the first collection of not
    if asm.most_recent_event_timestamp:
      since = calendar.timegm((asm.most_recent_event_timestamp -
                               self.MOST_RECENT_OVERLAP).utctimetuple())
    else:
      since = calendar.timegm((datetime.utcnow() -
                               self.NEW_LOOKBACK_WINDOW).utctimetuple())
    args['since'] = since

    # fetch all new posts
    posts_url = unicode('{0}{1}?{2}').format(self.oauth_config['endpoint'],
                                             self.FEED_COLLECTION,
                                             urllib.urlencode(args))

    total_accepted = 0
    while posts_url and total_accepted < self.MAX_EVENTS:

      logging.debug('requesting: "%s"', posts_url)
      posts_obj = json_serializer.load(urllib2.urlopen(posts_url))

      # process the item

      # TODO loop termination on various constraints is not exact

      # for element in the feed
      for post in posts_obj['data']:

        # currently only interested in 'status' posts from the user
        if post['from']['id'] == service_author_id:

          post_type = post.get('type', None)

          # if this is a status update and there is an action or the
          # user is tagged in the story keep it

          # TODO: check for user in story_tags is experimental

          if post_type == 'status':

            tagged = False
            if 'story_tags' in post:
              for story_tag in post['story_tags'].itervalues():
                for entity in story_tag:
                  if int(entity['id']) == int(service_author_id):
                    tagged = True
                    break
                if tagged:
                  break

            if not post.get('actions') and not tagged:
              continue

          # skip photo and checkin posts.  they will get picked-up by their respective
          # processing below
          if post_type == 'photo' or post_type == 'checkin':
            continue

          interpreter = FacebookEventInterpreter(post, asm, self.oauth_config)
          if self.screen_event(interpreter, state):
            total_accepted = total_accepted + 1
            callback(create_facebook_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post))

      # setup for the next page (if any).  Check that we're not looping ?? do we even need to check ??
      next_url = posts_obj['paging']['next'] if 'paging' in posts_obj and 'next' in posts_obj['paging'] else None
      posts_url = next_url if next_url and next_url != posts_url else None

    # while posts

    # collect photos for all time if this is the first update; otherwise
    # only collect photos since the last update.  Setting since to None
    # and remove the 'since' property from the query args will collect
    # all photos
    if not asm.most_recent_event_timestamp:
      since = None
      del args['since']

    albums_url = unicode('{0}{1}?{2}').format(self.oauth_config['endpoint'],
                                              self.ALBUMS_COLLECTION,
                                              urllib.urlencode({'access_token': asm.access_token}))

    while albums_url:

      albums_obj = json_serializer.load(urllib2.urlopen(albums_url))

      for album in albums_obj.get('data', []):

        # skip photos posted to friend's walls
        if album['type'] == 'friends_walls':
          continue

        created_time = calendar.timegm(datetime.strptime(album['created_time'], "%Y-%m-%dT%H:%M:%S+0000").utctimetuple())
        updated_time = calendar.timegm(datetime.strptime(album['updated_time'], "%Y-%m-%dT%H:%M:%S+0000").utctimetuple())

        if since == None or created_time >= since or updated_time >= since:

          # set the type to 'album so it will match what you get when it's directly
          # queried; also makes it easier for the event process to identify it
          album['type'] = 'album'

          interpreter = FacebookEventInterpreter(post, asm, self.oauth_config)

          # send event message
          callback(create_facebook_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), album))

        # if

        album_id = album['id']

        # check for any new photos in the album
        photos_url = unicode('{0}{1}{2}?{3}').format(self.oauth_config['endpoint'],
                                                     album_id,
                                                     self.PHOTOS_COLLECTION,
                                                     urllib.urlencode(args))
        while photos_url:

          photos_obj = json_serializer.load(urllib2.urlopen(photos_url))

          for photo in photos_obj.get('data', []):

            photo['type'] = 'photo'

            interpreter = FacebookEventInterpreter(post, asm, self.oauth_config)

            # event message
            callback(create_facebook_event(
                  asm.author_id,
                  CURRENT_STATE,
                  service_author_id,
                  interpreter.get_id(),
                  photo,
                  [create_event_link(data_access.service.name_to_id('facebook'), album_id)]))

          # setup for the next page (if any).  Check that we're not looping ?? do we even need to check ??
          next_url = photos_obj['paging']['next'] if 'paging' in photos_obj and 'next' in photos_obj['paging'] else None
          photos_url = next_url if next_url and next_url != photos_url else None

        # while photos

      # setup for the next page (if any).  Check that we're not looping ?? do we even need to check ??
      next_url = albums_obj['paging']['next'] if 'paging' in albums_obj and 'next' in albums_obj['paging'] else None
      albums_url = next_url if next_url and next_url != albums_url else None

    # while albums

    # fetch all new checkins
    checkins_url = unicode('{0}{1}?{2}').format(self.oauth_config['endpoint'],
                                                self.CHECKIN_COLLECTION,
                                                urllib.urlencode(args))

    total_accepted = 0
    while checkins_url and total_accepted < self.MAX_EVENTS:

      checkins_obj = json_serializer.load(urllib2.urlopen(checkins_url))

      # process the item

      # TODO loop termination on various constraints is not exact

      # for element in the feed
      for checkin_obj in checkins_obj['data']:

        # filter checkins not directly from this user
        if checkin_obj['from']['id'] == service_author_id:

          # set the type to checkin.  When querying for checkins the
          # type property is missing
          checkin_obj['type'] = 'checkin'

          interpreter = FacebookEventInterpreter(post, asm, self.oauth_config)

          if self.screen_event(interpreter, state):
            total_accepted = total_accepted + 1
            callback(create_facebook_event(asm.author_id, CURRENT_STATE, service_author_id, interpreter.get_id(), post))

      # setup for the next page (if any).  Check that we're not looping ?? do we even need to check ??
      next_url = checkins_obj['paging']['next'] if 'paging' in checkins_obj and 'next' in checkins_obj['paging'] else None
      checkins_url = next_url if next_url and next_url != posts_url else None

    # while checkins

    # terminate the fetch
    self.fetch_end(state)
    def fetch(self, service_author_id, callback):

        super(InstagramEventCollector, self).fetch(service_author_id, callback)

        state = self.fetch_begin(service_author_id)

        self.fetch_log_info(state)

        asm = state["asm"]

        if asm.access_token:
            access_token = asm.access_token
            user_media = self.USER_MEDIA.format("self")
        else:
            access_token = self.oauth_config["user1_access_token"]
            user_media = self.USER_MEDIA.format(asm.service_author_id)

        args = {"access_token": access_token, "count": self.PAGE_SIZE}

        # get only events since last update or past year depending on if this
        # is the first collection of not
        if asm.most_recent_event_timestamp:
            min_timestamp = calendar.timegm((asm.most_recent_event_timestamp - self.MOST_RECENT_OVERLAP).utctimetuple())
        else:
            min_timestamp = calendar.timegm((datetime.utcnow() - self.NEW_LOOKBACK_WINDOW).utctimetuple())
        args["min_timestamp"] = min_timestamp

        # setup the url for fetching a page of posts
        url = "{0}{1}?{2}".format(self.oauth_config["endpoint"], user_media, urllib.urlencode(args))

        total_accepted = 0
        while url and total_accepted < self.MAX_EVENTS:

            raw_obj = json_serializer.load(urllib2.urlopen(url))

            # for element in the feed
            for post in raw_obj.get("data", []):

                interpreter = InstagramEventInterpreter(post, asm, self.oauth_config)

                if self.screen_event(interpreter, state):
                    total_accepted = total_accepted + 1
                    callback(
                        create_instagram_event(
                            asm.author_id,
                            CURRENT_STATE,
                            service_author_id,
                            interpreter.get_id(),
                            post,
                            [
                                create_event_link(
                                    data_access.service.name_to_id("instagram"),
                                    "_{0}@{1}".format(self.service_name, asm.author_id),
                                )
                            ],
                        )
                    )

                # if
            # for

            # setup for the next page (if any)
            url = (
                raw_obj["pagination"]["next_url"]
                if "pagination" in raw_obj and "next_url" in raw_obj["pagination"]
                else None
            )

        # terminate the fetch
        self.fetch_end(state)