コード例 #1
0
  def postprocess_activity(self, activity):
    """Does source-independent post-processing of an activity, in place.

    Right now just populates the title field.

    Args:
      activity: activity dict
    """
    activity = util.trim_nulls(activity)
    # maps object type to human-readable name to use in title
    TYPE_DISPLAY_NAMES = {'image': 'photo', 'product': 'gift'}

    # maps verb to human-readable verb
    DISPLAY_VERBS = {'like': 'likes', 'listen': 'listened to',
                     'play': 'watched', 'read': 'read', 'give': 'gave'}

    actor_name = self.actor_name(activity.get('actor'))
    obj = activity.get('object')

    if obj and not activity.get('title'):
      verb = DISPLAY_VERBS.get(activity['verb'])
      obj_name = obj.get('displayName')
      if obj_name and not verb:
        activity['title'] = obj_name
      else:
        app = activity.get('generator', {}).get('displayName')
        obj_type = TYPE_DISPLAY_NAMES.get(obj.get('objectType'), 'unknown')
        name = obj_name if obj_name else 'a %s' % obj_type
        app = ' on %s' % app if app else ''
        activity['title'] = '%s %s %s%s.' % (actor_name, verb or 'posted',
                                             name, app)

    return util.trim_nulls(activity)
コード例 #2
0
ファイル: jsonfeed.py プロジェクト: snarfed/granary
def jsonfeed_to_activities(jsonfeed):
  """Converts a JSON feed to ActivityStreams activities and actor.

  Args:
    jsonfeed: dict, JSON Feed data

  Returns:
    (activities, actor) tuple, where activities and actor are both
    ActivityStreams object dicts

  Raises:
    ValueError, if jsonfeed isn't a valid JSON Feed dict
  """
  if not hasattr(jsonfeed, 'get'):
    raise ValueError('Expected dict (or compatible), got %s' % jsonfeed.__class__.__name__)

  author = jsonfeed.get('author', {})
  actor = {
    'objectType': 'person',
    'url': author.get('url'),
    'image': [{'url': author.get('avatar')}],
    'displayName': author.get('name'),
  }

  def attachment(jf):
    url = jf.get('url')
    type = jf.get('mime_type', '').split('/')[0]
    as1 = {
      'objectType': type,
      'title': jf.get('title'),
    }
    if type in ('audio', 'video'):
      as1['stream'] = {'url': url}
    else:
      as1['url'] = url
    return as1

  activities = [{'object': {
    'objectType': 'article' if item.get('title') else 'note',
    'title': item.get('title'),
    'summary': item.get('summary'),
    'content': item.get('content_html') or item.get('content_text'),
    'id': str(item.get('id') or ''),
    'published': item.get('date_published'),
    'updated': item.get('date_modified'),
    'url': item.get('url'),
    'image': [{'url':  item.get('image')}],
    'author': {
      'displayName': item.get('author', {}).get('name'),
      'image': [{'url': item.get('author', {}).get('avatar')}]
    },
    'attachments': [attachment(a) for a in item.get('attachments', [])],
  }} for item in jsonfeed.get('items', [])]

  return (util.trim_nulls(activities), util.trim_nulls(actor))
コード例 #3
0
  def postprocess_object(self, obj):
    """Does source-independent post-processing of an object, in place.

    Right now just populates the displayName field.

    Args:
      object: object dict
    """
    verb = obj.get('verb')
    content = obj.get('content')
    rsvp_content = RSVP_CONTENTS.get(verb)

    if rsvp_content and not content:
      if verb.startswith('rsvp-'):
        content = obj['content'] = '<data class="p-rsvp" value="%s">%s</data>' % (
          verb.split('-')[1], rsvp_content)
      else:
        content = obj['content'] = rsvp_content

    if content and not obj.get('displayName'):
      actor_name = self.actor_name(obj.get('author') or obj.get('actor'))
      if verb in ('like', 'share'):
        obj['displayName'] = '%s %s' % (actor_name, content)
      elif rsvp_content:
        if verb == 'invite':
          actor_name = self.actor_name(obj.get('object'))
        obj['displayName'] = '%s %s' % (actor_name, rsvp_content)
      else:
        obj['displayName'] = util.ellipsize(content)

    return util.trim_nulls(obj)
コード例 #4
0
ファイル: microformats2.py プロジェクト: snarfed/granary
def object_urls(obj):
  """Returns an object's unique URLs, preserving order.
  """
  if isinstance(obj, basestring):
    return obj
  return uniquify(util.trim_nulls(
    [obj.get('url')] + [u.get('value') for u in obj.get('urls', [])]))
コード例 #5
0
ファイル: instagram.py プロジェクト: harixxy/granary
  def user_to_actor(self, user):
    """Converts a user to an actor.

    Args:
      user: JSON object from the Instagram API

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
    if not user:
      return {}

    id = user.get('id')
    username = user.get('username')
    actor = {
      'id': self.tag_uri(id or username),
      'username': username,
    }
    if not id or not username:
      return actor

    url = user.get('website')
    if not url:
      url = self.user_url(username)

    actor.update({
      'objectType': 'person',
      'displayName': user.get('full_name') or username,
      'image': {'url': user.get('profile_picture')},
      'url': url,
      'description': user.get('bio')
    })

    return util.trim_nulls(actor)
コード例 #6
0
  def user_to_actor(self, user):
    """Converts a tweet to an activity.

    Args:
      user: dict, a decoded JSON Twitter user

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
    username = user.get('screen_name')
    if not username:
      return {}

    url = user.get('url')
    if url:
      for entity in user.get('entities', {}).get('url', {}).get('urls', []):
        expanded = entity.get('expanded_url')
        if entity['url'] == url and expanded:
          url = expanded
    else:
      url = self.user_url(username)

    return util.trim_nulls({
      'displayName': user.get('name'),
      'image': {'url': user.get('profile_image_url')},
      'id': self.tag_uri(username),
      # numeric_id is our own custom field that always has the source's numeric
      # user id, if available.
      'numeric_id': user.get('id_str'),
      'published': self.rfc2822_to_iso8601(user.get('created_at')),
      'url': url,
      'location': {'displayName': user.get('location')},
      'username': username,
      'description': user.get('description'),
      })
コード例 #7
0
    def user_to_actor(self, user):
        """Converts a user to an actor.

    Args:
      user: python_instagram.models.User or dict

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
        if not user:
            return {}
        elif isinstance(user, dict):
            user = python_instagram.models.User.object_from_dictionary(user)

        id = getattr(user, "id", None)
        username = getattr(user, "username", None)
        actor = {"id": self.tag_uri(id or username), "username": username}
        if not id or not username:
            return actor

        url = getattr(user, "website", None)
        if not url:
            url = "http://instagram.com/" + username

        actor.update(
            {
                "objectType": "person",
                "displayName": user.full_name,
                "image": {"url": user.profile_picture},
                "url": url,
                "description": getattr(user, "bio", None),
            }
        )

        return util.trim_nulls(actor)
コード例 #8
0
  def original_post_discovery(activity):
    """Discovers original post links and stores them as tags, in place.

    This is a variation on http://indiewebcamp.com/original-post-discovery . It
    differs in that it finds multiple candidate links instead of one, and it
    doesn't bother looking for MF2 (etc) markup because the silos don't let you
    input it.

    Args:
      activity: activity dict
    """
    obj = activity.get('object') or activity
    content = obj.get('content', '').strip()

    # Permashortcitations are short references to canonical copies of a given
    # (usually syndicated) post, of the form (DOMAIN PATH). Details:
    # http://indiewebcamp.com/permashortcitation
    pscs =  set(match.expand(r'http://\1/\2')
                for match in Source._PERMASHORTCITATION_RE.finditer(content))

    attachments = set(a.get('url') for a in obj.get('attachments', [])
                      if a['objectType'] == 'article')
    urls = util.trim_nulls(util.extract_links(content) | attachments | pscs)
    obj.setdefault('tags', []).extend({'objectType': 'article', 'url': u}
                                      for u in urls)

    return activity
コード例 #9
0
ファイル: facebook.py プロジェクト: harixxy/granary
  def user_to_actor(self, user):
    """Converts a user or page to an actor.

    Args:
      user: dict, a decoded JSON Facebook user or page

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
    if not user:
      return {}

    id = user.get('id')
    username = user.get('username')
    handle = username or id
    if not handle:
      return {}

    # facebook implements this as a 302 redirect
    actor = {
      # FB only returns the type field if you fetch the object with ?metadata=1
      # https://developers.facebook.com/docs/graph-api/using-graph-api/v2.2#introspection
      'objectType': 'page' if user.get('type') == 'page' else 'person',
      'displayName': user.get('name') or username,
      'id': self.tag_uri(handle),
      'updated': util.maybe_iso8601_to_rfc3339(user.get('updated_time')),
      'username': username,
      'description': user.get('bio') or user.get('description'),
      'summary': user.get('about'),
      }

    # numeric_id is our own custom field that always has the source's numeric
    # user id, if available.
    if util.is_int(id):
      actor.update({
        'numeric_id': id,
        'image': {
          'url': 'https://graph.facebook.com/v2.2/%s/picture?type=large' % id,
        },
      })

    # extract web site links. extract_links uniquifies and preserves order
    urls = util.extract_links(user.get('website'))
    if not urls:
      urls = util.extract_links(user.get('link')) or [self.user_url(handle)]
    actor['url'] = urls[0]
    if len(urls) > 1:
      actor['urls'] = [{'value': u} for u in urls]

    location = user.get('location')
    if location:
      actor['location'] = {'id': location.get('id'),
                           'displayName': location.get('name')}

    return util.trim_nulls(actor)
コード例 #10
0
ファイル: facebook.py プロジェクト: harixxy/granary
  def urlopen_batch_full(self, requests):
    """Sends a batch of multiple API calls using Facebook's batch API.

    Similar to urlopen_batch(), but the requests arg and return value are dicts
    with headers, HTTP status code, etc. Only raises urllib2.HTTPError if the
    outer batch request itself returns an HTTP error.

    https://developers.facebook.com/docs/graph-api/making-multiple-requests

    Args:
      requests: sequence of dict requests in Facebook's batch format, except
      that headers is a single dict, not a list of dicts.

        [{'relative_url': 'me/feed',
          'headers': {'ETag': 'xyz', ...},
         },
         ...
        ]

    Returns: sequence of dict responses in Facebook's batch format, except that
      body is JSON-decoded if possible, and headers is a single dict, not a list
      of dicts.

      [{'code': 200,
        'headers': {'ETag': 'xyz', ...},
        'body': {...},
       },
       ...
      ]

    """
    for req in requests:
      if 'method' not in req:
        req['method'] = 'GET'
      if 'headers' in req:
        req['headers'] = [{'name': n, 'value': v}
                          for n, v in req['headers'].items()]

    data = 'batch=' + json.dumps(util.trim_nulls(requests),
                                 separators=(',', ':'))  # no whitespace
    resps = self.urlopen('', data=data)

    for resp in resps:
      if 'headers' in resp:
        resp['headers'] = {h['name']: h['value'] for h in resp['headers']}

      body = resp.get('body')
      if body:
        try:
          resp['body'] = json.loads(body)
        except (ValueError, TypeError):
          pass

    return resps
コード例 #11
0
ファイル: instagram.py プロジェクト: kylewm/granary
  def get_comment(self, comment_id, activity_id=None, activity_author_id=None):
    """Returns an ActivityStreams comment object.

    Args:
      comment_id: string comment id
      activity_id: string activity id, optional
      activity_author_id: string activity author id. Ignored.
    """
    media = util.trim_nulls(self.urlopen(API_MEDIA_URL % activity_id) or {})
    for comment in media.get('comments', {}).get('data', []):
      if comment.get('id') == comment_id:
        return self.comment_to_object(comment, activity_id, media.get('link'))
コード例 #12
0
ファイル: source.py プロジェクト: snarfed/granary
  def is_public(obj):
    """Returns True if the object is public, False if private, None if unknown.

    ...according to the Audience Targeting extension
    https://developers.google.com/+/api/latest/activities/list#collection

    Expects values generated by this library: objectType group, alias @public or
    @private.

    Also, important point: this defaults to true, ie public. Bridgy depends on
    that and prunes the to field from stored activities in Response objects (in
    bridgy/util.prune_activity()). If the default here ever changes, be sure to
    update Bridgy's code.
    """
    to = obj.get('to') or obj.get('object', {}).get('to') or []
    aliases = util.trim_nulls([t.get('alias') for t in to])
    object_types = util.trim_nulls([t.get('objectType') for t in to])
    return (True if '@public' in aliases
            else None if 'unknown' in object_types
            else False if aliases
            else True)
コード例 #13
0
ファイル: twitter.py プロジェクト: kylewm/granary
  def user_to_actor(self, user):
    """Converts a tweet to an activity.

    Args:
      user: dict, a decoded JSON Twitter user

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
    username = user.get('screen_name')
    if not username:
      return {}

    urls = util.trim_nulls(
      [e.get('expanded_url') for e in itertools.chain(
        *(user.get('entities', {}).get(field, {}).get('urls', [])
          for field in ('url', 'description')))])
    url = urls[0] if urls else user.get('url') or self.user_url(username)

    image = (PROFILE_PICTURE_URL % username or
             user.get('profile_image_url_https') or user.get('profile_image_url'))
    if image:
      # remove _normal for a ~256x256 avatar rather than ~48x48
      image = image.replace('_normal.', '.', 1)

    return util.trim_nulls({
      'objectType': 'person',
      'displayName': user.get('name') or username,
      'image': {'url': image},
      'id': self.tag_uri(username),
      # numeric_id is our own custom field that always has the source's numeric
      # user id, if available.
      'numeric_id': user.get('id_str'),
      'published': self.rfc2822_to_iso8601(user.get('created_at')),
      'url': url,
      'urls': [{'value': u} for u in urls],
      'location': {'displayName': user.get('location')},
      'username': username,
      'description': user.get('description'),
      })
コード例 #14
0
ファイル: instagram.py プロジェクト: kylewm/granary
  def get_actor(self, user_id=None):
    """Returns a user as a JSON ActivityStreams actor dict.

    Args:
      user_id: string id or username. Defaults to 'self', ie the current user.

    Raises: InstagramAPIError
    """
    if user_id is None:
      user_id = 'self'

    return self.user_to_actor(util.trim_nulls(
      self.urlopen(API_USER_URL % user_id) or {}))
コード例 #15
0
ファイル: source.py プロジェクト: harixxy/granary
    def postprocess_activity(self, activity):
        """Does source-independent post-processing of an activity, in place.

    Right now just populates the title field.

    Args:
      activity: activity dict
    """
        activity = util.trim_nulls(activity)
        # maps object type to human-readable name to use in title
        TYPE_DISPLAY_NAMES = {"image": "photo", "product": "gift"}

        # maps verb to human-readable verb
        DISPLAY_VERBS = {
            "give": "gave",
            "like": "likes",
            "listen": "listened to",
            "play": "watched",
            "read": "read",
            "share": "shared",
        }

        actor_name = self.actor_name(activity.get("actor"))
        obj = activity.get("object")

        if obj and not activity.get("title"):
            verb = DISPLAY_VERBS.get(activity["verb"])
            obj_name = obj.get("displayName")
            obj_type = TYPE_DISPLAY_NAMES.get(obj.get("objectType"))
            if obj_name and not verb:
                activity["title"] = obj_name
            elif verb and (obj_name or obj_type):
                app = activity.get("generator", {}).get("displayName")
                name = obj_name if obj_name else "a %s" % (obj_type or "unknown")
                app = " on %s" % app if app else ""
                activity["title"] = "%s %s %s%s." % (actor_name, verb or "posted", name, app)

        return util.trim_nulls(activity)
コード例 #16
0
ファイル: source.py プロジェクト: snarfed/granary
  def postprocess_object(obj):
    """Does source-independent post-processing of an object, in place.

    Populates location.position based on latitude and longitude.

    Args:
      object: object dict
    """
    loc = obj.get('location')
    if loc:
      lat = loc.get('latitude')
      lon = loc.get('longitude')
      if lat and lon and not loc.get('position'):
        # ISO 6709 location string. details: http://en.wikipedia.org/wiki/ISO_6709
        loc['position'] = '%0+10.6f%0+11.6f/' % (lat, lon)

    return util.trim_nulls(obj)
コード例 #17
0
ファイル: source.py プロジェクト: harixxy/granary
    def postprocess_object(self, obj):
        """Does source-independent post-processing of an object, in place.

    * populates location.position based on latitude and longitude

    Args:
      object: object dict
    """
        loc = obj.get("location")
        if loc:
            lat = loc.get("latitude")
            lon = loc.get("longitude")
            if lat and lon and not loc.get("position"):
                # ISO 6709 location string. details: http://en.wikipedia.org/wiki/ISO_6709
                loc["position"] = "%+f%+f/" % (lat, lon)

        return util.trim_nulls(obj)
コード例 #18
0
ファイル: twitter.py プロジェクト: kylewm/granary
  def fetch_mentions(self, username, min_id=None):
    """Fetches a user's @-mentions and returns them as ActivityStreams.

    Tries to only include explicit mentions, not mentions automatically created
    by @-replying. See the get_activities() docstring for details.

    Args:
      username: string
      min_id: only return activities with ids greater than this

    Returns:
      list of activity dicts
    """
    # get mentions
    url = API_SEARCH_URL % {
      'q': urllib.quote_plus('@' + username),
      'count': 100,
    }
    if min_id is not None:
      url = util.add_query_params(url, {'since_id': min_id})
    candidates = self.urlopen(url)['statuses']

    # fetch in-reply-to tweets (if any)
    in_reply_to_ids = util.trim_nulls(
      [c.get('in_reply_to_status_id_str') for c in candidates])
    origs = {o.get('id_str'): o for o in
             self.urlopen(API_LOOKUP_URL % ','.join(in_reply_to_ids))}

    # filter out tweets that we don't consider mentions
    mentions = []
    for c in candidates:
      if (c.get('user', {}).get('screen_name') == username or
          c.get('retweeted_status')):
        continue

      reply_to = origs.get(c.get('in_reply_to_status_id_str'))
      if not reply_to:
        mentions.append(c)
      else:
        reply_to_user = reply_to.get('user', {}).get('screen_name')
        mentioned = [u.get('screen_name') for u in
                     reply_to.get('entities', {}).get('user_mentions', [])]
        if username != reply_to_user and username not in mentioned:
          mentions.append(c)

    return mentions
コード例 #19
0
  def get_actor(self, user_id=None):
    """Returns a user as a JSON ActivityStreams actor dict.

    Args:
      user_id: string id or username. Defaults to 'self', ie the current user.

    Raises: InstagramAPIError
    """
    if user_id is None:
      assert self.scrape is False, 'get_actor() requires user_id when scraping'
      user_id = 'self'

    if self.scrape:
      return self.get_activities_response(group_id=source.SELF, user_id=user_id
                                         ).get('actor')
    else:
      return self.user_to_actor(util.trim_nulls(
        self.urlopen(API_USER_URL % user_id) or {}))
コード例 #20
0
  def user_to_actor(self, user):
    """Converts a user to an actor.

    Args:
      user: dict, a decoded JSON Facebook user

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
    if not user:
      return {}

    id = user.get('id')
    username = user.get('username')
    handle = username or id
    if not handle:
      return {}

    url = (user.get('website') or user.get('link') or
           'http://facebook.com/' + handle)

    # facebook implements this as a 302 redirect
    image_url = 'http://graph.facebook.com/%s/picture?type=large' % handle
    actor = {
      'displayName': user.get('name'),
      'image': {'url': image_url},
      'id': self.tag_uri(handle),
      # numeric_id is our own custom field that always has the source's numeric
      # user id, if available.
      'numeric_id': id,
      'updated': util.maybe_iso8601_to_rfc3339(user.get('updated_time')),
      'url': url,
      'username': username,
      'description': user.get('bio'),
      }

    location = user.get('location')
    if location:
      actor['location'] = {'id': location.get('id'),
                           'displayName': location.get('name')}

    return util.trim_nulls(actor)
コード例 #21
0
ファイル: flickr.py プロジェクト: singpolyma/granary
  def user_to_actor(self, resp):
    """Convert a Flickr user dict into an ActivityStreams actor.
    """
    person = resp.get('person', {})
    username = person.get('username', {}).get('_content')
    obj = util.trim_nulls({
      'objectType': 'person',
      'displayName': person.get('realname', {}).get('_content') or username,
      'image': {
        'url': self.get_user_image(person.get('iconfarm'),
                                   person.get('iconserver'),
                                   person.get('nsid')),
      },
      'id': self.tag_uri(username),
      # numeric_id is our own custom field that always has the source's numeric
      # user id, if available.
      'numeric_id': person.get('nsid'),
      'location': {
        'displayName': person.get('location', {}).get('_content'),
      },
      'username': username,
      'description': person.get('description', {}).get('_content'),
    })

    # fetch profile page to get url(s)
    profile_url = person.get('profileurl', {}).get('_content')
    if profile_url:
      try:
        logging.debug('fetching flickr profile page %s', profile_url)
        resp = urllib2.urlopen(
          profile_url, timeout=appengine_config.HTTP_TIMEOUT)
        profile_json = mf2py.parse(doc=resp, url=profile_url)
        # personal site is likely the first non-flickr url
        urls = profile_json.get('rels', {}).get('me', [])
        obj['urls'] = [{'value': u} for u in urls]
        obj['url'] = next(
          (u for u in urls if not u.startswith('https://www.flickr.com/')),
          None)
      except urllib2.URLError, e:
        logging.warning('could not fetch user homepage %s', profile_url)
コード例 #22
0
ファイル: flickr.py プロジェクト: snarfed/granary
  def user_to_actor(self, resp):
    """Convert a Flickr user dict into an ActivityStreams actor.
    """
    person = resp.get('person', {})
    username = person.get('username', {}).get('_content')
    obj = util.trim_nulls({
      'objectType': 'person',
      'displayName': person.get('realname', {}).get('_content') or username,
      'image': {
        'url': self.get_user_image(person.get('iconfarm'),
                                   person.get('iconserver'),
                                   person.get('nsid')),
      },
      'id': self.tag_uri(username),
      # numeric_id is our own custom field that always has the source's numeric
      # user id, if available.
      'numeric_id': person.get('nsid'),
      'location': {
        'displayName': person.get('location', {}).get('_content'),
      },
      'username': username,
      'description': person.get('description', {}).get('_content'),
    })

    # fetch profile page to get url(s)
    profile_url = person.get('profileurl', {}).get('_content')
    if profile_url:
      try:
        resp = util.urlopen(profile_url)
        profile_json = mf2py.parse(doc=resp, url=profile_url, img_with_alt=True)
        urls = profile_json.get('rels', {}).get('me', [])
        if urls:
          obj['url'] = urls[0]
        if len(urls) > 1:
          obj['urls'] = [{'value': u} for u in urls]
      except urllib_error.URLError:
        logging.warning('could not fetch user homepage %s', profile_url)

    return self.postprocess_object(obj)
コード例 #23
0
ファイル: instagram.py プロジェクト: kylewm/granary
  def user_to_actor(self, user):
    """Converts a user to an actor.

    Args:
      user: JSON object from the Instagram API

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
    if not user:
      return {}

    id = user.get('id')
    username = user.get('username')
    actor = {
      'id': self.tag_uri(id or username),
      'username': username,
    }
    if not id or not username:
      return actor

    urls = sum((util.extract_links(user.get(field)) for field in ('website', 'bio')),
               [])
    if urls:
      actor['url'] = urls[0]
      if len(urls) > 1:
        actor['urls'] = [{'value': u} for u in urls]
    else:
      actor['url'] = self.user_url(username)

    actor.update({
      'objectType': 'person',
      'displayName': user.get('full_name') or username,
      'image': {'url': user.get('profile_picture')},
      'description': user.get('bio')
    })

    return util.trim_nulls(actor)
コード例 #24
0
  def user_to_actor(self, user):
    """Converts a user to an actor.

    Args:
      user: python_instagram.models.User or dict

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
    if not user:
      return {}
    elif isinstance(user, dict):
      user = python_instagram.models.User.object_from_dictionary(user)

    id = getattr(user, 'id', None)
    username = getattr(user, 'username', None)
    actor = {
      'id': self.tag_uri(id or username),
      'username': username,
      }
    if not id or not username:
      return actor

    url = getattr(user, 'website', None)
    if not url:
      url = self.user_url(username)

    actor.update({
      'objectType': 'person',
      'displayName': user.full_name,
      'image': {'url': user.profile_picture},
      'url': url,
      'description': getattr(user, 'bio', None)
      })

    return util.trim_nulls(actor)
コード例 #25
0
ファイル: jsonfeed.py プロジェクト: snarfed/granary
def activities_to_jsonfeed(activities, actor=None, title=None, feed_url=None,
                           home_page_url=None):
  """Converts ActivityStreams activities to a JSON feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    home_page_url: string, the home page URL
    feed_url: the URL of the JSON Feed, if any. Included in the feed_url field.

  Returns:
    dict, JSON Feed data, ready to be JSON-encoded
  """
  try:
    iter(activities)
  except TypeError:
    raise TypeError('activities must be iterable')

  if isinstance(activities, (dict, basestring)):
    raise TypeError('activities may not be a dict or string')

  def image_url(obj):
    return util.get_first(obj, 'image', {}).get('url')

  def actor_name(obj):
    return obj.get('displayName') or obj.get('username')

  if not actor:
    actor = {}

  items = []
  for activity in activities:
    obj = activity.get('object') or activity
    if obj.get('objectType') == 'person':
      continue
    author = obj.get('author', {})
    content = microformats2.render_content(
            obj, include_location=True, render_attachments=True)
    obj_title = obj.get('title') or obj.get('displayName')
    item = {
      'id': obj.get('id') or obj.get('url'),
      'url': obj.get('url'),
      'image': image_url(obj),
      'title': obj_title if mf2util.is_name_a_title(obj_title, content) else None,
      'summary': obj.get('summary'),
      'content_html': content,
      'date_published': obj.get('published'),
      'date_modified': obj.get('updated'),
      'author': {
        'name': actor_name(author),
        'url': author.get('url'),
        'avatar': image_url(author),
      },
      'attachments': [],
    }

    for att in obj.get('attachments', []):
      url = (util.get_first(att, 'stream') or util.get_first(att, 'image') or att
            ).get('url')
      mime = mimetypes.guess_type(url)[0] if url else None
      if (att.get('objectType') in ATTACHMENT_TYPES or
          mime and mime.split('/')[0] in ATTACHMENT_TYPES):
        item['attachments'].append({
          'url': url or '',
          'mime_type': mime,
          'title': att.get('title'),
        })

    if not item['content_html']:
      item['content_text'] = ''
    items.append(item)

  return util.trim_nulls({
    'version': 'https://jsonfeed.org/version/1',
    'title': title or actor_name(actor) or 'JSON Feed',
    'feed_url': feed_url,
    'home_page_url': home_page_url or actor.get('url'),
    'author': {
      'name': actor_name(actor),
      'url': actor.get('url'),
      'avatar': image_url(actor),
    },
    'items': items,
  }, ignore='content_text')
コード例 #26
0
    def template_vars(self, domain=None, url=None):
        logging.debug(f'Headers: {list(request.headers.items())}')

        if domain.split('.')[-1] in NON_TLDS:
            error(f"{domain} doesn't look like a domain", status=404)

        # find representative h-card. try url, then url's home page, then domain
        urls = [f'http://{domain}/']
        if url:
            urls = [url, urllib.parse.urljoin(url, '/')] + urls

        for candidate in urls:
            resp = common.requests_get(candidate)
            parsed = util.parse_html(resp)
            mf2 = util.parse_mf2(parsed, url=resp.url)
            # logging.debug(f'Parsed mf2 for {resp.url}: {json_dumps(mf2, indent=2)}')
            hcard = mf2util.representative_hcard(mf2, resp.url)
            if hcard:
                logging.info(
                    f'Representative h-card: {json_dumps(hcard, indent=2)}')
                break
        else:
            error(
                f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}"
            )

        logging.info(f'Generating WebFinger data for {domain}')
        key = models.MagicKey.get_or_create(domain)
        props = hcard.get('properties', {})
        urls = util.dedupe_urls(props.get('url', []) + [resp.url])
        canonical_url = urls[0]

        acct = f'{domain}@{domain}'
        for url in urls:
            if url.startswith('acct:'):
                urluser, urldomain = util.parse_acct_uri(url)
                if urldomain == domain:
                    acct = f'{urluser}@{domain}'
                    logging.info(f'Found custom username: acct:{acct}')
                    break

        # discover atom feed, if any
        atom = parsed.find('link',
                           rel='alternate',
                           type=common.CONTENT_TYPE_ATOM)
        if atom and atom['href']:
            atom = urllib.parse.urljoin(resp.url, atom['href'])
        else:
            atom = 'https://granary.io/url?' + urllib.parse.urlencode(
                {
                    'input': 'html',
                    'output': 'atom',
                    'url': resp.url,
                    'hub': resp.url,
                })

        # discover PuSH, if any
        for link in resp.headers.get('Link', '').split(','):
            match = common.LINK_HEADER_RE.match(link)
            if match and match.group(2) == 'hub':
                hub = match.group(1)
            else:
                hub = 'https://bridgy-fed.superfeedr.com/'

        # generate webfinger content
        data = util.trim_nulls({
            'subject':
            'acct:' + acct,
            'aliases':
            urls,
            'magic_keys': [{
                'value': key.href()
            }],
            'links':
            sum(([{
                'rel': 'http://webfinger.net/rel/profile-page',
                'type': 'text/html',
                'href': url,
            }] for url in urls if url.startswith("http")), []) +
            [{
                'rel': 'http://webfinger.net/rel/avatar',
                'href': get_text(url),
            } for url in props.get('photo', [])] + [
                {
                    'rel': 'canonical_uri',
                    'type': 'text/html',
                    'href': canonical_url,
                },

                # ActivityPub
                {
                    'rel': 'self',
                    'type': common.CONTENT_TYPE_AS2,
                    # WARNING: in python 2 sometimes request.host_url lost port,
                    # http://localhost:8080 would become just http://localhost. no
                    # clue how or why. pay attention here if that happens again.
                    'href': f'{request.host_url}{domain}',
                },
                {
                    'rel': 'inbox',
                    'type': common.CONTENT_TYPE_AS2,
                    'href': f'{request.host_url}{domain}/inbox',
                },

                # OStatus
                {
                    'rel': 'http://schemas.google.com/g/2010#updates-from',
                    'type': common.CONTENT_TYPE_ATOM,
                    'href': atom,
                },
                {
                    'rel': 'hub',
                    'href': hub,
                },
                {
                    'rel': 'magic-public-key',
                    'href': key.href(),
                },
                {
                    'rel': 'salmon',
                    'href': f'{request.host_url}{domain}/salmon',
                }
            ]
        })
        logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}')
        return data
コード例 #27
0
ファイル: common.py プロジェクト: iitians/bridgy-fed
    def postprocess_as2(self, activity, target=None, key=None):
        """Prepare an AS2 object to be served or sent via ActivityPub.

        Args:
          activity: dict, AS2 object or activity
          target: dict, AS2 object, optional. The target of activity's inReplyTo or
            Like/Announce/etc object, if any.
          key: MagicKey, optional. populated into publicKey field if provided.
        """
        type = activity.get('type')

        # actor objects
        if type == 'Person':
            self.postprocess_as2_actor(activity)
            if not activity.get('publicKey'):
                # underspecified, inferred from this issue and Mastodon's implementation:
                # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
                # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
                activity['publicKey'] = {
                    'id': activity.get('preferredUsername'),
                    'publicKeyPem': key.public_pem().decode(),
                }
            return activity

        for actor in (util.get_list(activity, 'attributedTo') +
                      util.get_list(activity, 'actor')):
            self.postprocess_as2_actor(actor)

        # inReplyTo: singly valued, prefer id over url
        target_id = target.get('id') if target else None
        in_reply_to = activity.get('inReplyTo')
        if in_reply_to:
            if target_id:
                activity['inReplyTo'] = target_id
            elif isinstance(in_reply_to, list):
                if len(in_reply_to) > 1:
                    logging.warning(
                        "AS2 doesn't support multiple inReplyTo URLs! "
                        'Only using the first: %s' % in_reply_to[0])
                activity['inReplyTo'] = in_reply_to[0]

            # Mastodon evidently requires a Mention tag for replies to generate a
            # notification to the original post's author. not required for likes,
            # reposts, etc. details:
            # https://github.com/snarfed/bridgy-fed/issues/34
            if target:
                for to in (util.get_list(target, 'attributedTo') +
                           util.get_list(target, 'actor')):
                    if isinstance(to, dict):
                        to = to.get('url') or to.get('id')
                    if to:
                        activity.setdefault('tag', []).append({
                            'type': 'Mention',
                            'href': to,
                        })

        # activity objects (for Like, Announce, etc): prefer id over url
        obj = activity.get('object')
        if obj:
            if isinstance(obj, dict) and not obj.get('id'):
                obj['id'] = target_id or obj.get('url')
            elif target_id and obj != target_id:
                activity['object'] = target_id

        # id is required for most things. default to url if it's not set.
        if not activity.get('id'):
            activity['id'] = activity.get('url')

        # TODO: find a better way to check this, sometimes or always?
        # removed for now since it fires on posts without u-id or u-url, eg
        # https://chrisbeckstrom.com/2018/12/27/32551/
        # assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))

        activity['id'] = self.redirect_wrap(activity.get('id'))
        activity['url'] = self.redirect_wrap(activity.get('url'))

        # copy image(s) into attachment(s). may be Mastodon-specific.
        # https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618
        obj_or_activity = obj if isinstance(obj, dict) else activity
        obj_or_activity.setdefault('attachment',
                                   []).extend(obj_or_activity.get('image', []))

        # cc public and target's author(s) and recipients
        # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
        # https://w3c.github.io/activitypub/#delivery
        if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'):
            recips = [AS2_PUBLIC_AUDIENCE]
            if target:
                recips += itertools.chain(*(util.get_list(target, field)
                                            for field in ('actor',
                                                          'attributedTo', 'to',
                                                          'cc')))
            activity['cc'] = util.dedupe_urls(
                util.get_url(recip) or recip.get('id') for recip in recips)

        # wrap articles and notes in a Create activity
        if type in ('Article', 'Note'):
            activity = {
                '@context': as2.CONTEXT,
                'type': 'Create',
                'object': activity,
            }

        return util.trim_nulls(activity)
コード例 #28
0
ファイル: instagram.py プロジェクト: fpcMotif/granary
    def get_activities_response(self,
                                user_id=None,
                                group_id=None,
                                app_id=None,
                                activity_id=None,
                                start_index=0,
                                count=0,
                                etag=None,
                                min_id=None,
                                cache=None,
                                fetch_replies=False,
                                fetch_likes=False,
                                fetch_shares=False,
                                fetch_events=False,
                                fetch_mentions=False,
                                search_query=None,
                                scrape=False,
                                cookie=None,
                                **kwargs):
        """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Mentions are never fetched or included because the API doesn't support
    searching for them.
    https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875

    Shares are never fetched included since there is no share feature.

    Instagram only supports search over hashtags, so if search_query is set, it
    must begin with #.

    May populate a custom 'ig_like_count' property in media objects. (Currently
    only when scraping.)

    Args:
      scrape: if True, scrapes HTML from instagram.com instead of using the API.
        Populates the user's actor object in the 'actor' response field.
        Useful for apps that haven't yet been approved in the new permissions
        approval process. Currently only supports group_id=SELF. Also supports
        passing a shortcode as activity_id as well as the internal API id.
        http://developers.instagram.com/post/133424514006/instagram-platform-update
      cookie: string, only used if scrape=True
      **: see :meth:`Source.get_activities_reponse`

    Raises:
      InstagramAPIError
    """
        if scrape or self.scrape:
            if not (activity_id or (group_id == source.SELF and user_id) or
                    (group_id == source.FRIENDS and cookie)):
                raise NotImplementedError(
                    'Scraping only supports activity_id, user_id and group_id=@self, or cookie and group_id=@friends.'
                )
            return self._scrape(user_id=user_id,
                                activity_id=activity_id,
                                cookie=cookie,
                                fetch_extras=fetch_replies or fetch_likes,
                                cache=cache)

        if user_id is None:
            user_id = 'self'
        if group_id is None:
            group_id = source.FRIENDS

        if search_query:
            if search_query.startswith('#'):
                search_query = search_query[1:]
            else:
                raise ValueError(
                    'Instagram only supports search over hashtags, so search_query must '
                    'begin with the # character.')

        # TODO: paging
        media = []
        kwargs = {}
        if min_id is not None:
            kwargs['min_id'] = min_id

        activities = []
        try:
            media_url = (
                API_MEDIA_URL %
                activity_id if activity_id else API_USER_MEDIA_URL %
                user_id if group_id == source.SELF else API_MEDIA_POPULAR_URL
                if group_id == source.ALL else API_MEDIA_SEARCH_URL %
                search_query if group_id == source.SEARCH else
                API_USER_FEED_URL if group_id == source.FRIENDS else None)
            assert media_url
            media = self.urlopen(util.add_query_params(media_url, kwargs))
            if media:
                if activity_id:
                    media = [media]
                activities += [
                    self.media_to_activity(m) for m in util.trim_nulls(media)
                ]

            if group_id == source.SELF and fetch_likes:
                # add the user's own likes
                liked = self.urlopen(
                    util.add_query_params(API_USER_LIKES_URL % user_id,
                                          kwargs))
                if liked:
                    user = self.urlopen(API_USER_URL % user_id)
                    activities += [
                        self.like_to_object(user, l['id'], l['link'])
                        for l in liked
                    ]

        except urllib2.HTTPError, e:
            code, body = util.interpret_http_exception(e)
            # instagram api should give us back a json block describing the
            # error. but if it's an error for some other reason, it probably won't
            # be properly formatted json.
            try:
                body_obj = json.loads(body) if body else {}
            except ValueError:
                body_obj = {}

            if body_obj.get('meta',
                            {}).get('error_type') == 'APINotFoundError':
                logging.exception(
                    body_obj.get('meta', {}).get('error_message'))
            else:
                raise e
コード例 #29
0
def object_urls(obj):
  """Returns an object's unique URLs, preserving order.
  """
  return util.uniquify(util.trim_nulls(
    [obj.get('url')] + [u.get('value') for u in obj.get('urls', [])]))
コード例 #30
0
def json_to_object(mf2, actor=None):
  """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')
  rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
  author = json_to_object(prop['author']) if prop.get('author') else actor

  # maps mf2 type to ActivityStreams objectType and optional verb.
  mf2_type_to_as_type = {
    'rsvp': ('activity', rsvp_verb),
    'invite': ('activity', 'invite'),
    'repost': ('activity', 'share'),
    'like': ('activity', 'like'),
    'reply': ('comment', None),
    'person': ('person', None),
    'location': ('place', None),
    'note': ('note', None),
    'article': ('article', None),
  }

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None))

  def absolute_urls(prop):
    return [{'url': url} for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urlparse.urlparse(url).netloc]

  urls = props.get('url') and get_string_urls(props.get('url'))

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    'image': absolute_urls('photo'),
    'stream': absolute_urls('video'),
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, basestring)
             else json_to_object(cat)
             for cat in props.get('category', [])],
  }

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location']['latitude'] = float(lat)
          obj['location']['longitude'] = float(lng)
          # TODO fill in 'position', maybe using Source.postprocess_object?
        except ValueError:
          logging.warn(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
        'object': objects[0] if len(objects) == 1 else objects,
        'actor': author,
        })
  else:
    obj.update({
        'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))],
        'author': author,
        })

  return util.trim_nulls(obj)
コード例 #31
0
def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string, the mf2 class that entries should be given (e.g.
      'h-cite' when parsing a reference to a foreign entry). defaults to
      'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get(
    'inReplyTo', obj.get('context', {}).get('inReplyTo', []))
  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # TODO: more tags. most will be p-category?
  ret = {
    'type': (['h-card'] if obj_type == 'person'
             else ['h-card', 'p-location'] if obj_type == 'place'
             else [entry_class]),
    'properties': {
      'uid': [obj.get('id', '')],
      'name': [name],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      'photo': [image.get('url') for image in
                (util.get_list(obj, 'image') or util.get_list(primary, 'image'))],
      'video': [obj.get('stream', primary.get('stream', {})).get('url')],
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'content': [{
          'value': xml.sax.saxutils.unescape(primary.get('content', '')),
          'html': render_content(primary, include_location=False,
                                 synthesize_content=synthesize_content),
      }],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'latitude': primary.get('latitude'),
      'longitude': primary.get('longitude'),
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
    },
    'children': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                 for c in primary.get('attachments', [])
                 if c.get('objectType') in ('note', 'article')],
  }

  # hashtags and person tags
  tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get('tags', [])
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      cls = 'u-category h-card'
    elif tag.get('objectType') == 'hashtag':
      cls = 'u-category'
    else:
      continue
    ret['properties']['category'].append(object_to_json(tag, entry_class=cls))

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in ('like', 'like'), ('share', 'repost'):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = util.get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret
コード例 #32
0
ファイル: instagram.py プロジェクト: fpcMotif/granary
    def html_to_activities(self, html):
        """Converts Instagram HTML to ActivityStreams activities.

    The input HTML may be from:

    * a user's feed, eg https://www.instagram.com/ while logged in
    * a user's profile, eg https://www.instagram.com/snarfed/
    * a photo or video, eg https://www.instagram.com/p/BBWCSrfFZAk/

    Args:
      html: unicode string

    Returns:
      tuple, ([ActivityStreams activities], ActivityStreams viewer actor)
    """
        # extract JSON data blob
        script_start = '<script type="text/javascript">window._sharedData = '
        start = html.find(script_start)
        if start == -1:
            # Instagram sometimes returns 200 with incomplete HTML. often it stops at
            # the end of one of the <style> tags inside <head>. not sure why.
            logging.warning('JSON script tag not found!')
            return [], None

        # App Engine's Python 2.7.5 json module doesn't support unpaired surrogate
        # Unicode chars, so it chokes on some JSON docs. Monkey patch in simplejson
        # to fix that.
        # https://code.google.com/p/googleappengine/issues/detail?id=12823
        # http://stackoverflow.com/questions/15236742
        try:
            import simplejson
            json_module = simplejson
        except ImportError:
            json_module = json

        start += len(script_start)
        end = html.find(';</script>', start)
        if end == -1:
            # as mentioned above, Instagram sometimes returns 200 with incomplete HTML
            logging.warning('JSON script close tag not found!')
            return [], None
        data = util.trim_nulls(json_module.loads(html[start:end]))

        entry_data = data.get('entry_data', {})
        activities = []

        # find media
        medias = []
        profile_user = None
        for page in entry_data.get('FeedPage', []):
            medias.extend(
                page.get('feed', {}).get('media', {}).get('nodes', []))
        for page in entry_data.get('ProfilePage', []):
            profile_user = page.get('user', {})
            medias.extend(profile_user.get('media', {}).get('nodes', []))
        medias.extend(
            page.get('media') for page in entry_data.get('PostPage', []))

        for media in util.trim_nulls(medias):
            activities.append(self._json_media_node_to_activity(media))

        actor = None
        viewer = data.get('config', {}).get('viewer') or profile_user or None
        if viewer:
            profile = viewer.get('profile_pic_url')
            if profile:
                viewer['profile_picture'] = profile.replace('\/', '/')
            website = viewer.get('external_url')
            if website:
                viewer['website'] = website.replace('\/', '/')
            viewer.setdefault('bio', viewer.get('biography'))
            actor = self.user_to_actor(viewer)
            if viewer.get('is_private'):
                actor['to'] = [{'objectType': 'group', 'alias': '@private'}]

        return activities, actor
コード例 #33
0
ファイル: instagram.py プロジェクト: fpcMotif/granary
    def _json_media_node_to_activity(self, media):
        """Converts Instagram HTML JSON media node to ActivityStreams activity.

    Args:
      media: dict, subset of Instagram HTML JSON representing a single photo
        or video

    Returns:
      dict, ActivityStreams activity
    """
        # preprocess to make its field names match the API's
        dims = media.get('dimensions', {})
        owner = media.get('owner', {})
        image_url = media.get('display_src') or media.get('display_url') or ''
        media.update({
            'link':
            self.media_url(media.get('code') or media.get('shortcode')),
            'user':
            owner,
            'created_time':
            media.get('date'),
            'caption': {
                'text': media.get('caption')
            },
            'images': {
                'standard_resolution': {
                    'url': image_url.replace('\/', '/'),
                    'width': dims.get('width'),
                    'height': dims.get('height'),
                }
            },
            'users_in_photo':
            media.get('usertags', {}).get('nodes', []),
        })

        id = media.get('id')
        owner_id = owner.get('id')
        if id and owner_id:
            media['id'] = '%s_%s' % (id, owner_id)

        comments = media.setdefault('comments', {}).setdefault('nodes', [])
        likes = media.setdefault('likes', {}).setdefault('nodes', [])
        for obj in [media] + comments + likes:
            obj['user']['profile_picture'] = \
              obj['user'].get('profile_pic_url', '').replace('\/', '/')

        media['comments']['data'] = comments
        for c in media['comments']['data']:
            c['from'] = c['user']
            c['created_time'] = c['created_at']
        media['likes']['data'] = [l['user'] for l in likes]

        if media.get('is_video'):
            media.update({
                'type': 'video',
                'videos': {
                    'standard_resolution': {
                        'url': media.get('video_url', '').replace('\/', '/'),
                        'width': dims.get('width'),
                        'height': dims.get('height'),
                    }
                },
            })

        activity = self.media_to_activity(util.trim_nulls(media))
        obj = activity['object']
        obj['ig_like_count'] = media['likes'].get('count', 0)

        # multi-photo
        children = media.get('edge_sidecar_to_children', {}).get('edges', [])
        if children:
            obj['attachments'] = list(
                itertools.chain(*(self._json_media_node_to_activity(
                    child.get('node'))['object']['attachments']
                                  for child in children)))

        self.postprocess_object(obj)
        return super(Instagram, self).postprocess_activity(activity)
コード例 #34
0
def json_to_object(mf2):
  """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object

  Returns: dict, ActivityStreams object
  """
  if not mf2:
    return {}

  props = mf2.get('properties', {})
  prop = first_props(props)
  content = prop.get('content', {})
  rsvp = prop.get('rsvp')
  rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
  author = json_to_object(prop.get('author'))

  # maps mf2 type to ActivityStreams objectType and optional verb. ordered by
  # priority.
  types = mf2.get('type', [])
  types_map = [
    ('h-as-rsvp', 'activity', rsvp_verb),
    ('h-as-repost', 'activity', 'share'),
    ('h-as-like', 'activity', 'like'),
    ('p-comment', 'comment', None),
    ('h-as-reply', 'comment', None),
    ('p-location', 'place', None),
    ('h-card', 'person', None),
    ]

  # fallback if none of the above mf2 types are found. maps property (if it
  # exists) to objectType and verb. ordered by priority.
  prop_types_map = [
    ('rsvp', 'activity', rsvp_verb),
    ('invitee', 'activity', 'invite'),
    ('repost', 'activity', 'share'),
    ('repost-of', 'activity', 'share'),
    ('like', 'activity', 'like'),
    ('like-of', 'activity', 'like'),
    ('in-reply-to', 'comment', None),
    ]

  for mf2_type, as_type, as_verb in types_map:
    if mf2_type in types:
      break  # found
  else:
    for p, as_type, as_verb in prop_types_map:
      if p in props:
        break
    else:
      # default
      as_type = 'note' if 'h-as-note' in types else 'article'
      as_verb = None

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'displayName': prop.get('name'),
    'content': content.get('value') or content.get('html'),
    'url': prop.get('url'),
    'image': {'url': prop.get('photo')},
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    # TODO
    # location
    }

  if as_type == 'activity':
    urls = set(itertools.chain.from_iterable(get_string_urls(props.get(field, []))
        for field in ('like', 'like-of', 'repost', 'repost-of', 'in-reply-to')))
    objects = [{'url': url} for url in urls]
    objects += [json_to_object(i) for i in props.get('invitee', [])]
    obj.update({
        'object': objects[0] if len(objects) == 1 else objects,
        'actor': author,
        })
  else:
    obj.update({
        'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))],
        'author': author,
        })

  return util.trim_nulls(obj)
コード例 #35
0
def object_to_json(obj, ctx={}, trim_nulls=True):
    """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    ctx: dict, a decoded JSON ActivityStreams context
    trim_nulls: boolean, whether to remove elements with null or empty values

  Returns: dict, decoded microformats2 JSON
  """
    if not obj:
        return {}

    types_map = {
        'article': ['h-entry', 'h-as-article'],
        'comment': ['h-entry', 'p-comment'],
        'like': ['h-entry', 'h-as-like'],
        'note': ['h-entry', 'h-as-note'],
        'person': ['h-card'],
        'place': ['h-card', 'p-location'],
        'share': ['h-entry', 'h-as-repost'],
        'rsvp-yes': ['h-entry', 'h-as-rsvp'],
        'rsvp-no': ['h-entry', 'h-as-rsvp'],
        'rsvp-maybe': ['h-entry', 'h-as-rsvp'],
        'invite': ['h-entry'],
    }
    obj_type = source.object_type(obj)
    types = types_map.get(obj_type, ['h-entry'])

    url = obj.get('url', '')
    content = obj.get('content', '')
    # TODO: extract snippet
    name = obj.get('displayName', obj.get('title'))
    summary = obj.get('summary')

    author = obj.get('author', obj.get('actor', {}))
    author = object_to_json(author, trim_nulls=False)
    if author:
        author['type'] = ['h-card']

    location = object_to_json(obj.get('location', {}), trim_nulls=False)
    if location:
        location['type'] = ['h-card', 'p-location']

    in_reply_tos = obj.get('inReplyTo', []) + ctx.get('inReplyTo', [])
    if 'h-as-rsvp' in types and 'object' in obj:
        in_reply_tos.append(obj['object'])
    # TODO: more tags. most will be p-category?
    ret = {
        'type': types,
        'properties': {
            'uid': [obj.get('id', '')],
            'name': [name],
            'summary': [summary],
            'url': [url] + obj.get('upstreamDuplicates', []),
            'photo': [obj.get('image', {}).get('url', '')],
            'video': [obj.get('stream', {}).get('url')],
            'published': [obj.get('published', '')],
            'updated': [obj.get('updated', '')],
            'content': [{
                'value': xml.sax.saxutils.unescape(content),
                'html': render_content(obj, include_location=False),
            }],
            'in-reply-to':
            util.trim_nulls([o.get('url') for o in in_reply_tos]),
            'author': [author],
            'location': [location],
            'comment': [
                object_to_json(c, trim_nulls=False)
                for c in obj.get('replies', {}).get('items', [])
            ],
        }
    }

    # rsvp
    if 'h-as-rsvp' in types:
        ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
    elif obj_type == 'invite':
        invitee = object_to_json(obj.get('object'), trim_nulls=False)
        invitee['type'].append('p-invitee')
        ret['properties']['invitee'] = [invitee]
    # likes and reposts
    # http://indiewebcamp.com/like#Counterproposal
    for type, prop in ('like', 'like'), ('share', 'repost'):
        if obj_type == type:
            # The ActivityStreams spec says the object property should always be a
            # single object, but it's useful to let it be a list, e.g. when a like has
            # multiple targets, e.g. a like of a post with original post URLs in it,
            # which brid.gy does.
            objs = obj.get('object', [])
            if not isinstance(objs, list):
                objs = [objs]
            ret['properties'][prop] = ret['properties'][prop + '-of'] = \
                [o.get('url') for o in objs]
        else:
            ret['properties'][prop] = [
                object_to_json(t, trim_nulls=False)
                for t in obj.get('tags', []) if source.object_type(t) == type
            ]

    if trim_nulls:
        ret = util.trim_nulls(ret)
    return ret
コード例 #36
0
def object_to_json(obj, trim_nulls=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values

  Returns: dict, decoded microformats2 JSON
  """
  if not obj:
    return {}

  types_map = {'article': ['h-entry', 'h-as-article'],
               'comment': ['h-entry', 'p-comment'],
               'like': ['h-entry', 'h-as-like'],
               'note': ['h-entry', 'h-as-note'],
               'person': ['h-card'],
               'place': ['h-card', 'p-location'],
               'share': ['h-entry', 'h-as-repost'],
               'rsvp-yes': ['h-entry', 'h-as-rsvp'],
               'rsvp-no': ['h-entry', 'h-as-rsvp'],
               'rsvp-maybe': ['h-entry', 'h-as-rsvp'],
               'invite': ['h-entry'],
               }
  obj_type = object_type(obj)
  types = types_map.get(obj_type, ['h-entry'])

  url = obj.get('url', '')
  content = obj.get('content', '')
  # TODO: extract snippet
  name = obj.get('displayName', obj.get('title', content))

  author = obj.get('author', obj.get('actor', {}))
  author = object_to_json(author, trim_nulls=False)
  if author:
    author['type'] = ['h-card']

  location = object_to_json(obj.get('location', {}), trim_nulls=False)
  if location:
    location['type'] = ['h-card', 'p-location']

  in_reply_tos = obj.get('inReplyTo', [])
  if 'h-as-rsvp' in types and 'object' in obj:
    in_reply_tos.append(obj['object'])
  # TODO: more tags. most will be p-category?
  ret = {
    'type': types,
    'properties': {
      'uid': [obj.get('id', '')],
      'name': [name],
      'url': [url],
      'photo': [obj.get('image', {}).get('url', '')],
      'published': [obj.get('published', '')],
      'updated':  [obj.get('updated', '')],
      'content': [{
          'value': content,
          'html': render_content(obj),
          }],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [author],
      'location': [location],
      'comment': [object_to_json(c, trim_nulls=False)
                  for c in obj.get('replies', {}).get('items', [])],
      }
    }

  # rsvp
  if 'h-as-rsvp' in types:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False)
    invitee['type'].append('p-invitee')
    ret['properties']['invitee'] = [invitee]
  # likes and reposts
  # http://indiewebcamp.com/like#Counterproposal
  for type, prop in ('like', 'like'), ('share', 'repost'):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = obj.get('object', [])
      if not isinstance(objs, list):
        objs = [objs]
      ret['properties'][prop] = ret['properties'][prop + '-of'] = \
          [o.get('url') for o in objs]
    else:
      ret['properties'][prop] = [object_to_json(t, trim_nulls=False)
                                 for t in obj.get('tags', [])
                                 if object_type(t) == type]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret
コード例 #37
0
def json_to_object(mf2):
    """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object

  Returns: dict, ActivityStreams object
  """
    if not mf2 or not isinstance(mf2, dict):
        return {}

    props = mf2.get('properties', {})
    prop = first_props(props)
    rsvp = prop.get('rsvp')
    rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
    author = json_to_object(prop.get('author'))

    # maps mf2 type to ActivityStreams objectType and optional verb. ordered by
    # priority.
    types = mf2.get('type', [])
    types_map = [
        ('h-as-rsvp', 'activity', rsvp_verb),
        ('h-as-repost', 'activity', 'share'),
        ('h-as-like', 'activity', 'like'),
        ('p-comment', 'comment', None),
        ('h-as-reply', 'comment', None),
        ('p-location', 'place', None),
        ('h-card', 'person', None),
    ]

    # fallback if none of the above mf2 types are found. maps property (if it
    # exists) to objectType and verb. ordered by priority.
    prop_types_map = [
        ('rsvp', 'activity', rsvp_verb),
        ('invitee', 'activity', 'invite'),
        ('repost-of', 'activity', 'share'),
        ('like-of', 'activity', 'like'),
        ('in-reply-to', 'comment', None),
    ]

    for mf2_type, as_type, as_verb in types_map:
        if mf2_type in types:
            break  # found
    else:
        for p, as_type, as_verb in prop_types_map:
            if p in props:
                break
        else:
            # default
            as_type = 'note' if 'h-as-note' in types else 'article'
            as_verb = None

    photos = [
        url for url in get_string_urls(props.get('photo', []))
        # filter out relative and invalid URLs (mf2py gives absolute urls)
        if urlparse.urlparse(url).netloc
    ]

    obj = {
        'id': prop.get('uid'),
        'objectType': as_type,
        'verb': as_verb,
        'published': prop.get('published', ''),
        'updated': prop.get('updated', ''),
        'displayName': get_text(prop.get('name')),
        'summary': get_text(prop.get('summary')),
        'content': get_html(prop.get('content')),
        'url': prop.get('url'),
        'image': {
            'url': photos[0] if photos else None
        },
        'location': json_to_object(prop.get('location')),
        'replies': {
            'items': [json_to_object(c) for c in props.get('comment', [])]
        },
    }

    if as_type == 'activity':
        urls = set(
            itertools.chain.from_iterable(
                get_string_urls(props.get(field, []))
                for field in ('like', 'like-of', 'repost', 'repost-of',
                              'in-reply-to')))
        objects = [{'url': url} for url in urls]
        objects += [json_to_object(i) for i in props.get('invitee', [])]
        obj.update({
            'object': objects[0] if len(objects) == 1 else objects,
            'actor': author,
        })
    else:
        obj.update({
            'inReplyTo': [{
                'url': url
            } for url in get_string_urls(props.get('in-reply-to', []))],
            'author':
            author,
        })

    return util.trim_nulls(obj)