Ejemplo n.º 1
0
def convert_mf2util():
    def dates_to_string(json):
        if isinstance(json, dict):
            return {k: dates_to_string(v) for (k, v) in json.items()}
        if isinstance(json, list):
            return [dates_to_string(v) for v in json]
        if isinstance(json, datetime.date) or isinstance(json, datetime.datetime):
            return json.isoformat()
        return json

    url = request.args.get('url')
    as_feed = request.args.get('as-feed')
    op = request.args.get('op')
    if url:
        try:
            d = mf2py.parse(url=url)
            if op == 'post-type-discovery':
                entry = mf2util.find_first_entry(d, ['h-entry', 'h-event'])
                return jsonify({'type': mf2util.post_type_discovery(entry)})
                
            if as_feed == 'true' or mf2util.find_first_entry(d, ['h-feed']):
                json = mf2util.interpret_feed(d, url)
            else:
                json = mf2util.interpret(d, url)
            return jsonify(dates_to_string(json))
        except:
            current_app.logger.exception('running mf2util service')
            return jsonify({'error': str(sys.exc_info()[0])})

    return """
Ejemplo n.º 2
0
def micropub2pelican(post, settings={}):
    post_type = mf2util.post_type_discovery(post)
    if post_type not in supported_post_types:
        raise Exception(f'{post_type} not among supported post types')

    entry = mf2util.interpret_entry({'items': [post]}, '')
    if not entry:
        raise Exception('Could not interpret parsed entry')

    return get_html(settings, post, post_type), \
        get_metadata(settings, entry, post, post_type)
Ejemplo n.º 3
0
def test_post_type_discovery():
    for test, implied_type in [
            ('interpret/hwc-event', 'event'),
            ('interpret/reply_h-cite', 'reply'),
            ('interpret/reply_u-in-reply-to', 'reply'),
            ('interpret/reply_rsvp', 'rsvp'),
            ('interpret/note_with_comment_and_like', 'note'),
            ('interpret/article_naive_datetime', 'article'),
            ('posttype/tantek_photo', 'photo'),
            ('posttype/only_html_content', 'note'),
            # TODO add more tests
    ]:
        parsed = json.load(open('tests/' + test + '.json'))
        entry = mf2util.find_first_entry(parsed, ['h-entry', 'h-event'])
        assert implied_type == mf2util.post_type_discovery(entry)
Ejemplo n.º 4
0
def test_post_type_discovery():
    for test, implied_type in [
        ('interpret/hwc-event', 'event'),
        ('interpret/reply_h-cite', 'reply'),
        ('interpret/reply_u-in-reply-to', 'reply'),
        ('interpret/reply_rsvp', 'rsvp'),
        ('interpret/note_with_comment_and_like', 'note'),
        ('interpret/article_naive_datetime', 'article'),
        ('interpret/follow', 'follow'),
        ('posttype/tantek_photo', 'photo'),
        ('posttype/only_html_content', 'note'),
            # TODO add more tests
    ]:
        parsed = json.load(open('tests/' + test + '.json'))
        entry = mf2util.find_first_entry(parsed, ['h-entry', 'h-event'])
        assert implied_type == mf2util.post_type_discovery(entry)
Ejemplo n.º 5
0
    def interpret(self):
        if self._type is not None or self._props is not None:
            return

        import mf2util

        self._type = mf2util.post_type_discovery(self._mf_entry)
        self._props = mf2util.interpret_entry(
            self._owner_feed._mf_dict, self._owner_feed.url,
            hentry=self._mf_entry)

        # Adds a `is_micropost` property.
        self._detect_micropost()

        # mf2util only detects the first photo for a "photo"-type post,
        # but there might be several so we need to fix that.
        #
        # mf2util also apparently doesn't always bring "category" info.
        self._fix_interpreted_props('photo', 'category')
Ejemplo n.º 6
0
def json_to_object(mf2):
    """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object

  Returns: dict, ActivityStreams object
  """
    if not mf2 or not isinstance(mf2, dict):
        return {}

    props = mf2.get('properties', {})
    prop = first_props(props)
    rsvp = prop.get('rsvp')
    rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
    author = json_to_object(prop.get('author'))

    # maps mf2 type to ActivityStreams objectType and optional verb.
    mf2_type_to_as_type = {
        'rsvp': ('activity', rsvp_verb),
        'invite': ('activity', 'invite'),
        'repost': ('activity', 'share'),
        'like': ('activity', 'like'),
        'reply': ('comment', None),
        'person': ('person', None),
        'location': ('place', None),
        'note': ('note', None),
        'photo': ('note', None),
        'article': ('article', None),
    }

    mf2_types = mf2.get('type') or []
    mf2_type = ('location' if 'h-geo' in mf2_types or 'p-location' in mf2_types
                else mf2util.post_type_discovery(mf2))
    as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None))

    def absolute_urls(prop):
        return [
            {
                'url': url
            } for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urlparse.urlparse(url).netloc
        ]

    urls = props.get('url') and get_string_urls(props.get('url'))

    obj = {
        'id':
        prop.get('uid'),
        'objectType':
        as_type,
        'verb':
        as_verb,
        'published':
        prop.get('published', ''),
        'updated':
        prop.get('updated', ''),
        'displayName':
        get_text(prop.get('name')),
        'summary':
        get_text(prop.get('summary')),
        'content':
        get_html(prop.get('content')),
        'url':
        urls[0] if urls else None,
        'urls': [{
            'value': u
        } for u in urls] if urls and len(urls) > 1 else None,
        'image':
        absolute_urls('photo'),
        'stream':
        absolute_urls('video'),
        'location':
        json_to_object(prop.get('location')),
        'replies': {
            'items': [json_to_object(c) for c in props.get('comment', [])]
        },
        'tags': [{
            'objectType': 'hashtag',
            'displayName': cat
        } if isinstance(cat, basestring) else json_to_object(cat)
                 for cat in props.get('category', [])],
    }

    lat, lng = prop.get('latitude'), prop.get('longitude')
    if lat and lng:
        try:
            obj['latitude'], obj['longitude'] = float(lat), float(lng)
            # TODO fill in 'position', maybe using Source.postprocess_object?
        except ValueError:
            logging.warn(
                'Could not convert latitude/longitude (%s, %s) to decimal',
                lat, lng)

    if as_type == 'activity':
        objects = []
        for target in itertools.chain.from_iterable(
                props.get(field, [])
                for field in ('like', 'like-of', 'repost', 'repost-of',
                              'in-reply-to', 'invitee')):
            t = json_to_object(target) if isinstance(target, dict) else {
                'url': target
            }
            # eliminate duplicates from redundant backcompat properties
            if t not in objects:
                objects.append(t)
        obj.update({
            'object': objects[0] if len(objects) == 1 else objects,
            'actor': author,
        })
    else:
        obj.update({
            'inReplyTo': [{
                'url': url
            } for url in get_string_urls(props.get('in-reply-to', []))],
            'author':
            author,
        })

    return util.trim_nulls(obj)
Ejemplo n.º 7
0
def json_to_object(mf2, actor=None):
  """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')
  rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
  author = json_to_object(prop['author']) if prop.get('author') else actor

  # maps mf2 type to ActivityStreams objectType and optional verb.
  mf2_type_to_as_type = {
    'rsvp': ('activity', rsvp_verb),
    'invite': ('activity', 'invite'),
    'repost': ('activity', 'share'),
    'like': ('activity', 'like'),
    'reply': ('comment', None),
    'person': ('person', None),
    'location': ('place', None),
    'note': ('note', None),
    'article': ('article', None),
  }

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None))

  def absolute_urls(prop):
    return [{'url': url} for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urlparse.urlparse(url).netloc]

  urls = props.get('url') and get_string_urls(props.get('url'))

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    'image': absolute_urls('photo'),
    'stream': absolute_urls('video'),
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, basestring)
             else json_to_object(cat)
             for cat in props.get('category', [])],
  }

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location']['latitude'] = float(lat)
          obj['location']['longitude'] = float(lng)
          # TODO fill in 'position', maybe using Source.postprocess_object?
        except ValueError:
          logging.warn(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
        'object': objects[0] if len(objects) == 1 else objects,
        'actor': author,
        })
  else:
    obj.update({
        'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))],
        'author': author,
        })

  return util.trim_nulls(obj)
Ejemplo n.º 8
0
def json_to_object(mf2, actor=None, fetch_mf2=False):
    """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
    if not mf2 or not isinstance(mf2, dict):
        return {}

    mf2 = copy.copy(mf2)
    props = mf2.setdefault('properties', {})
    prop = first_props(props)
    rsvp = prop.get('rsvp')

    # convert author
    mf2_author = prop.get('author')
    if mf2_author and isinstance(mf2_author, dict):
        author = json_to_object(mf2_author)
    else:
        # the author h-card may be on another page. run full authorship algorithm:
        # https://indieweb.org/authorship
        def fetch(url):
            return mf2py.parse(util.requests_get(url).text, url=url)

        author = mf2util.find_author(
            {'items': [mf2]},
            hentry=mf2,
            fetch_mf2_func=fetch if fetch_mf2 else None)
        if author:
            author = {
                'objectType': 'person',
                'url': author.get('url'),
                'displayName': author.get('name'),
                'image': [{
                    'url': author.get('photo')
                }],
            }

    if not author:
        author = actor

    mf2_types = mf2.get('type') or []
    if 'h-geo' in mf2_types or 'p-location' in mf2_types:
        mf2_type = 'location'
    else:
        # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
        # *is* a photo. so, special case photo type to fall through to underlying
        # mf2 type without photo.
        # https://github.com/snarfed/bridgy/issues/702
        without_photo = copy.deepcopy(mf2)
        without_photo.get('properties', {}).pop('photo', None)
        mf2_type = mf2util.post_type_discovery(without_photo)

    as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
    if rsvp:
        as_verb = 'rsvp-%s' % rsvp

    # special case GitHub issues that are in-reply-to the repo or its issues URL
    in_reply_tos = get_string_urls(props.get('in-reply-to', []))
    for url in in_reply_tos:
        if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
            as_type = 'issue'

    def absolute_urls(prop):
        return [
            url for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urllib.parse.urlparse(url).netloc
        ]

    urls = props.get('url') and get_string_urls(props.get('url'))

    # quotations: https://indieweb.org/quotation#How_to_markup
    attachments = [
        json_to_object(quote)
        for quote in mf2.get('children', []) + props.get('quotation-of', [])
        if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))
    ]

    # audio and video
    for type in 'audio', 'video':
        attachments.extend({
            'objectType': type,
            'stream': {
                'url': url
            }
        } for url in get_string_urls(props.get(type, [])))

    obj = {
        'id':
        prop.get('uid'),
        'objectType':
        as_type,
        'verb':
        as_verb,
        'published':
        prop.get('published', ''),
        'updated':
        prop.get('updated', ''),
        'startTime':
        prop.get('start'),
        'endTime':
        prop.get('end'),
        'displayName':
        get_text(prop.get('name')),
        'summary':
        get_text(prop.get('summary')),
        'content':
        get_html(prop.get('content')),
        'url':
        urls[0] if urls else None,
        'urls': [{
            'value': u
        } for u in urls] if urls and len(urls) > 1 else None,
        'image': [{
            'url': url
        } for url in dedupe_urls(
            absolute_urls('photo') + absolute_urls('featured'))],
        'stream': [{
            'url': url
        } for url in absolute_urls('video')],
        'location':
        json_to_object(prop.get('location')),
        'replies': {
            'items': [json_to_object(c) for c in props.get('comment', [])]
        },
        'tags': [{
            'objectType': 'hashtag',
            'displayName': cat
        } if isinstance(cat, basestring) else json_to_object(cat)
                 for cat in props.get('category', [])],
        'attachments':
        attachments,
    }

    # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
    interpreted = mf2util.interpret({'items': [mf2]}, None)
    if interpreted:
        loc = interpreted.get('location')
        if loc:
            obj['location']['objectType'] = 'place'
            lat, lng = loc.get('latitude'), loc.get('longitude')
            if lat and lng:
                try:
                    obj['location'].update({
                        'latitude': float(lat),
                        'longitude': float(lng),
                    })
                except ValueError:
                    logging.warn(
                        'Could not convert latitude/longitude (%s, %s) to decimal',
                        lat, lng)

    if as_type == 'activity':
        objects = []
        for target in itertools.chain.from_iterable(
                props.get(field, [])
                for field in ('like', 'like-of', 'repost', 'repost-of',
                              'in-reply-to', 'invitee')):
            t = json_to_object(target) if isinstance(target, dict) else {
                'url': target
            }
            # eliminate duplicates from redundant backcompat properties
            if t not in objects:
                objects.append(t)
        obj.update({
            'object': objects[0] if len(objects) == 1 else objects,
            'actor': author,
        })
    else:
        obj.update({
            'inReplyTo': [{
                'url': url
            } for url in in_reply_tos],
            'author': author,
        })

    return source.Source.postprocess_object(obj)
Ejemplo n.º 9
0
def json_to_object(mf2, actor=None, fetch_mf2=False):
  """Converts a single microformats2 JSON item to an ActivityStreams object.

  Supports h-entry, h-event, h-card, and other single item times. Does *not* yet
  support h-feed.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')

  # convert author
  mf2_author = prop.get('author')
  if mf2_author and isinstance(mf2_author, dict):
    author = json_to_object(mf2_author)
  else:
    # the author h-card may be on another page. run full authorship algorithm:
    # https://indieweb.org/authorship
    def fetch(url):
      return mf2py.parse(util.requests_get(url).text, url=url, img_with_alt=True)
    author = mf2util.find_author(
      {'items': [mf2]}, hentry=mf2, fetch_mf2_func=fetch if fetch_mf2 else None)
    if author:
      author = {
        'objectType': 'person',
        'url': author.get('url'),
        'displayName': author.get('name'),
        'image': [{'url': author.get('photo')}],
      }

  if not author:
    author = actor

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  elif 'tag-of' in props:
    # TODO: remove once this is in mf2util
    # https://github.com/kylewm/mf2util/issues/18
    mf2_type = 'tag'
  elif 'follow-of' in props: # ditto
    mf2_type = 'follow'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
  if rsvp:
    as_verb = 'rsvp-%s' % rsvp

  # special case GitHub issues that are in-reply-to the repo or its issues URL
  in_reply_tos = get_string_urls(props.get('in-reply-to', []))
  for url in in_reply_tos:
    if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
      as_type = 'issue'

  def is_absolute(url):
    """Filter out relative and invalid URLs (mf2py gives absolute urls)."""
    return urllib.parse.urlparse(url).netloc

  urls = props.get('url') and get_string_urls(props.get('url'))

  # quotations: https://indieweb.org/quotation#How_to_markup
  attachments = [
    json_to_object(quote)
    for quote in mf2.get('children', []) + props.get('quotation-of', [])
    if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))]

  # audio and video
  for type in 'audio', 'video':
    attachments.extend({'objectType': type, 'stream': {'url': url}}
                       for url in get_string_urls(props.get(type, [])))

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'startTime': prop.get('start'),
    'endTime': prop.get('end'),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    # image is special cased below, to handle alt
    'stream': [{'url': url} for url in get_string_urls(props.get('video', []))],
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, basestring)
             else json_to_object(cat)
             for cat in props.get('category', [])],
    'attachments': attachments,
  }

  # images, including alt text
  photo_urls = set()
  obj['image'] = []
  for photo in props.get('photo', []) + props.get('featured', []):
    url = photo
    alt = None
    if isinstance(photo, dict):
      photo = photo.get('properties') or photo
      url = get_first(photo, 'value') or get_first(photo, 'url')
      alt = get_first(photo, 'alt')
    if url and url not in photo_urls and is_absolute(url):
      photo_urls.add(url)
      obj['image'].append({'url': url, 'displayName': alt})

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location'].update({
            'latitude': float(lat),
            'longitude': float(lng),
          })
        except ValueError:
          logging.warn(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to',
          'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
      'object': objects[0] if len(objects) == 1 else objects,
      'actor': author,
    })
    if as_verb == 'tag':
      obj['target'] = {'url': prop['tag-of']}
      if obj.get('object'):
        raise NotImplementedError(
          'Combined in-reply-to and tag-of is not yet supported.')
      obj['object'] = obj.pop('tags')
  else:
    obj.update({
      'inReplyTo': [{'url': url} for url in in_reply_tos],
      'author': author,
    })

  return source.Source.postprocess_object(obj)
Ejemplo n.º 10
0
def json_to_object(mf2, actor=None):
    """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.

  Returns:
    dict, ActivityStreams object
  """
    if not mf2 or not isinstance(mf2, dict):
        return {}

    mf2 = copy.copy(mf2)
    props = mf2.setdefault('properties', {})
    prop = first_props(props)
    rsvp = prop.get('rsvp')
    rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
    author = json_to_object(prop['author']) if prop.get('author') else actor

    # maps mf2 type to ActivityStreams objectType and optional verb.
    mf2_type_to_as_type = {
        'rsvp': ('activity', rsvp_verb),
        'invite': ('activity', 'invite'),
        'repost': ('activity', 'share'),
        'like': ('activity', 'like'),
        'reply': ('comment', None),
        'person': ('person', None),
        'location': ('place', None),
        'note': ('note', None),
        'article': ('article', None),
    }

    mf2_types = mf2.get('type') or []
    if 'h-geo' in mf2_types or 'p-location' in mf2_types:
        mf2_type = 'location'
    else:
        # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
        # *is* a photo. so, special case photo type to fall through to underlying
        # mf2 type without photo.
        # https://github.com/snarfed/bridgy/issues/702
        without_photo = copy.deepcopy(mf2)
        without_photo.get('properties', {}).pop('photo', None)
        mf2_type = mf2util.post_type_discovery(without_photo)

    as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None))

    def absolute_urls(prop):
        return [
            {
                'url': url
            } for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urlparse.urlparse(url).netloc
        ]

    urls = props.get('url') and get_string_urls(props.get('url'))

    obj = {
        'id':
        prop.get('uid'),
        'objectType':
        as_type,
        'verb':
        as_verb,
        'published':
        prop.get('published', ''),
        'updated':
        prop.get('updated', ''),
        'displayName':
        get_text(prop.get('name')),
        'summary':
        get_text(prop.get('summary')),
        'content':
        get_html(prop.get('content')),
        'url':
        urls[0] if urls else None,
        'urls': [{
            'value': u
        } for u in urls] if urls and len(urls) > 1 else None,
        'image':
        absolute_urls('photo'),
        'stream':
        absolute_urls('video'),
        'location':
        json_to_object(prop.get('location')),
        'replies': {
            'items': [json_to_object(c) for c in props.get('comment', [])]
        },
        'tags': [{
            'objectType': 'hashtag',
            'displayName': cat
        } if isinstance(cat, basestring) else json_to_object(cat)
                 for cat in props.get('category', [])],
    }

    # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
    interpreted = mf2util.interpret({'items': [mf2]}, None)
    if interpreted:
        loc = interpreted.get('location')
        if loc:
            obj['location']['objectType'] = 'place'
            lat, lng = loc.get('latitude'), loc.get('longitude')
            if lat and lng:
                try:
                    obj['location']['latitude'] = float(lat)
                    obj['location']['longitude'] = float(lng)
                    # TODO fill in 'position', maybe using Source.postprocess_object?
                except ValueError:
                    logging.warn(
                        'Could not convert latitude/longitude (%s, %s) to decimal',
                        lat, lng)

    if as_type == 'activity':
        objects = []
        for target in itertools.chain.from_iterable(
                props.get(field, [])
                for field in ('like', 'like-of', 'repost', 'repost-of',
                              'in-reply-to', 'invitee')):
            t = json_to_object(target) if isinstance(target, dict) else {
                'url': target
            }
            # eliminate duplicates from redundant backcompat properties
            if t not in objects:
                objects.append(t)
        obj.update({
            'object': objects[0] if len(objects) == 1 else objects,
            'actor': author,
        })
    else:
        obj.update({
            'inReplyTo': [{
                'url': url
            } for url in get_string_urls(props.get('in-reply-to', []))],
            'author':
            author,
        })

    return util.trim_nulls(obj)
Ejemplo n.º 11
0
def fetch_post_type(parsed):
    hentry = mf2util.find_first_entry(parsed, ['h-entry'])
    if hentry:
        return mf2util.post_type_discovery(hentry)
    else:
        return 'note'
Ejemplo n.º 12
0
def json_to_object(mf2):
  """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object

  Returns: dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  props = mf2.get('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')
  rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
  author = json_to_object(prop.get('author'))

  # maps mf2 class to a mf2 type. ordered by priority. these explicit
  # h-as-* types can override implicit post type discovery.
  h_class_overrides = [
    ('h-as-rsvp', 'rsvp'),
    ('h-as-share', 'repost'),
    ('h-as-like', 'like'),
    ('h-as-comment', 'reply'),
    ('h-as-reply', 'reply'),
    ('h-as-article', 'article'),
    ('h-as-note', 'note'),
    ('h-as-location', 'location'),
    ('h-card', 'person'),
  ]

  # maps mf2 type to ActivityStreams objectType and optional verb.
  mf2_type_to_as_type = {
    'rsvp': ('activity', rsvp_verb),
    'invite': ('activity', 'invite'),
    'repost': ('activity', 'share'),
    'like': ('activity', 'like'),
    'reply': ('comment', None),
    'person': ('person', None),
    'location': ('place', None),
    'note': ('note', None),
    'photo': ('note', None),
    'article': ('article', None),
  }

  for h_class, mf2_type in h_class_overrides:
    if h_class in mf2.get('type', []):
      break  # found
  else:
    mf2_type = mf2util.post_type_discovery(mf2)

  as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None))

  photos = [url for url in get_string_urls(props.get('photo', []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urlparse.urlparse(url).netloc]

  urls = props.get('url') and get_string_urls(props.get('url'))

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    'image': {'url': photos[0] if photos else None},
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [json_to_object(cat) for cat in props.get('category', [])],
  }

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
        'object': objects[0] if len(objects) == 1 else objects,
        'actor': author,
        })
  else:
    obj.update({
        'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))],
        'author': author,
        })

  return util.trim_nulls(obj)
Ejemplo n.º 13
0
def json_to_object(mf2, actor=None, fetch_mf2=False):
  """Converts a single microformats2 JSON item to an ActivityStreams object.

  Supports h-entry, h-event, h-card, and other single item times. Does *not* yet
  support h-feed.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')

  # convert author
  mf2_author = prop.get('author')
  if mf2_author and isinstance(mf2_author, dict):
    author = json_to_object(mf2_author)
  else:
    # the author h-card may be on another page. run full authorship algorithm:
    # https://indieweb.org/authorship
    author = mf2util.find_author({'items': [mf2]}, hentry=mf2,
                                 fetch_mf2_func=util.fetch_mf2 if fetch_mf2 else None)
    if author:
      author = {
        'objectType': 'person',
        'url': author.get('url'),
        'displayName': author.get('name'),
        'image': [{'url': author.get('photo')}],
      }

  if not author:
    author = actor

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  elif 'tag-of' in props:
    # TODO: remove once this is in mf2util
    # https://github.com/kylewm/mf2util/issues/18
    mf2_type = 'tag'
  elif 'follow-of' in props: # ditto
    mf2_type = 'follow'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
  if rsvp:
    as_verb = 'rsvp-%s' % rsvp

  # special case GitHub issues that are in-reply-to the repo or its issues URL
  in_reply_tos = get_string_urls(props.get('in-reply-to', []))
  for url in in_reply_tos:
    if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
      as_type = 'issue'

  def is_absolute(url):
    """Filter out relative and invalid URLs (mf2py gives absolute urls)."""
    return urllib.parse.urlparse(url).netloc

  urls = props.get('url') and get_string_urls(props.get('url'))

  # quotations: https://indieweb.org/quotation#How_to_markup
  attachments = [
    json_to_object(quote)
    for quote in mf2.get('children', []) + props.get('quotation-of', [])
    if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))]

  # audio and video
  #
  # the duration mf2 property is still emerging. examples in the wild use both
  # integer seconds and ISO 8601 durations.
  # https://indieweb.org/duration
  # https://en.wikipedia.org/wiki/ISO_8601#Durations
  duration = prop.get('duration') or prop.get('length')
  if duration:
    if util.is_int(duration):
      duration = int(duration)
    else:
      parsed = util.parse_iso8601_duration(duration)
      if parsed:
        duration = int(parsed.total_seconds())
      else:
        logging.debug('Unknown format for length or duration %r', duration)
        duration = None


  stream = None
  bytes = size_to_bytes(prop.get('size'))
  for type in 'audio', 'video':
    atts = [{
      'objectType': type,
      'stream': {
        'url': url,
        # integer seconds: http://activitystrea.ms/specs/json/1.0/#media-link
        'duration': duration,
        # file size in bytes. nonstandard, not in AS1 or AS2
        'size': bytes,
      },
    } for url in get_string_urls(props.get(type, []))]
    attachments.extend(atts)
    if atts:
      stream = atts[0]['stream']

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'startTime': prop.get('start'),
    'endTime': prop.get('end'),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    # image is special cased below, to handle alt
    'stream': [stream],
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, str)
             else json_to_object(cat)
             for cat in props.get('category', [])],
    'attachments': attachments,
  }

  # images, including alt text
  photo_urls = set()
  obj['image'] = []
  for photo in props.get('photo', []) + props.get('featured', []):
    url = photo
    alt = None
    if isinstance(photo, dict):
      photo = photo.get('properties') or photo
      url = get_first(photo, 'value') or get_first(photo, 'url')
      alt = get_first(photo, 'alt')
    if url and url not in photo_urls and is_absolute(url):
      photo_urls.add(url)
      obj['image'].append({'url': url, 'displayName': alt})

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location'].update({
            'latitude': float(lat),
            'longitude': float(lng),
          })
        except ValueError:
          logging.debug(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to',
          'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
      'object': objects[0] if len(objects) == 1 else objects,
      'actor': author,
    })
    if as_verb == 'tag':
      obj['target'] = {'url': prop['tag-of']}
      if obj.get('object'):
        raise NotImplementedError(
          'Combined in-reply-to and tag-of is not yet supported.')
      obj['object'] = obj.pop('tags')
  else:
    obj.update({
      'inReplyTo': [{'url': url} for url in in_reply_tos],
      'author': author,
    })

  return source.Source.postprocess_object(obj)