Python find_first_entry Examples, mf2util.find_first_entry Python Examples

Example #1

0

Show file

File: services.py Project: mongolsamurai/redwind

def convert_mf2util():
    def dates_to_string(json):
        if isinstance(json, dict):
            return {k: dates_to_string(v) for (k, v) in json.items()}
        if isinstance(json, list):
            return [dates_to_string(v) for v in json]
        if isinstance(json, datetime.date) or isinstance(json, datetime.datetime):
            return json.isoformat()
        return json

    url = request.args.get('url')
    as_feed = request.args.get('as-feed')
    op = request.args.get('op')
    if url:
        try:
            d = mf2py.parse(url=url)
            if op == 'post-type-discovery':
                entry = mf2util.find_first_entry(d, ['h-entry', 'h-event'])
                return jsonify({'type': mf2util.post_type_discovery(entry)})
                
            if as_feed == 'true' or mf2util.find_first_entry(d, ['h-feed']):
                json = mf2util.interpret_feed(d, url)
            else:
                json = mf2util.interpret(d, url)
            return jsonify(dates_to_string(json))
        except:
            current_app.logger.exception('running mf2util service')
            return jsonify({'error': str(sys.exc_info()[0])})

    return """

Example #2

0

Show file

    def post(self):
        logging.info('(Params: %s )', self.request.params.items())

        # fetch source page
        source = util.get_required_param(self, 'source')
        source_resp = common.requests_get(source)
        self.source_url = source_resp.url or source
        self.source_domain = urlparse.urlparse(self.source_url).netloc.split(':')[0]
        self.source_mf2 = mf2py.parse(source_resp.text, url=self.source_url, img_with_alt=True)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(self.source_mf2, indent=2))

        # check for backlink to bridgy fed (for webmention spec and to confirm
        # source's intent to federate to mastodon)
        if (self.request.host_url not in source_resp.text and
            urllib.quote(self.request.host_url, safe='') not in source_resp.text):
            common.error(self, "Couldn't find link to %s" % self.request.host_url)

        # convert source page to ActivityStreams
        entry = mf2util.find_first_entry(self.source_mf2, ['h-entry'])
        if not entry:
            common.error(self, 'No microformats2 found on %s' % self.source_url)

        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [self.source_url]

        self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS1: %s', json.dumps(self.source_obj, indent=2))

        self.try_activitypub() or self.try_salmon()

Example #3

0

Show file

def html_to_activities(html, url=None, actor=None, id=None):
  """Converts a microformats2 HTML h-feed to ActivityStreams activities.

  Args:
    html: unicode string HTML or :class:`requests.Response`
    url: optional string URL that HTML came from
    actor: optional author AS actor object for all activities. usually comes
      from a rel="author" link.
    id: string, optional id of specific element to extract and parse. defaults
      to the whole page.

  Returns:
    list of ActivityStreams activity dicts
  """
  parsed = util.parse_mf2(html, url=url, id=id)
  hfeed = mf2util.find_first_entry(parsed, ['h-feed'])
  items = hfeed.get('children', []) if hfeed else parsed.get('items', [])

  activities = []
  for item in items:
    types = item.get('type', [])
    if 'h-entry' in types or 'h-event' in types or 'h-cite' in types:
      obj = json_to_object(item, actor=actor)
      obj['content_is_html'] = True
      activities.append({'object': obj})

  return activities

Example #4

0

Show file

def html_to_activities(html, url=None):
    """Converts a microformats2 HTML h-feed to ActivityStreams activities.

  Args:
    html: string HTML
    url: optional string URL that HTML came from

  Returns: list of ActivityStreams activity dicts
  """
    parsed = mf2py.parse(doc=html, url=url)
    hfeed = mf2util.find_first_entry(parsed, ['h-feed'])
    items = hfeed.get('children', []) if hfeed else parsed.get('items', [])
    return [{'object': json_to_object(item)} for item in items]

Example #5

0

Show file

def test_post_type_discovery():
    for test, implied_type in [
            ('interpret/hwc-event', 'event'),
            ('interpret/reply_h-cite', 'reply'),
            ('interpret/reply_u-in-reply-to', 'reply'),
            ('interpret/reply_rsvp', 'rsvp'),
            ('interpret/note_with_comment_and_like', 'note'),
            ('interpret/article_naive_datetime', 'article'),
            ('posttype/tantek_photo', 'photo'),
            ('posttype/only_html_content', 'note'),
            # TODO add more tests
    ]:
        parsed = json.load(open('tests/' + test + '.json'))
        entry = mf2util.find_first_entry(parsed, ['h-entry', 'h-event'])
        assert implied_type == mf2util.post_type_discovery(entry)

Example #6

0

Show file

File: test_post_type_discovery.py Project: drivet/mf2util

def test_post_type_discovery():
    for test, implied_type in [
        ('interpret/hwc-event', 'event'),
        ('interpret/reply_h-cite', 'reply'),
        ('interpret/reply_u-in-reply-to', 'reply'),
        ('interpret/reply_rsvp', 'rsvp'),
        ('interpret/note_with_comment_and_like', 'note'),
        ('interpret/article_naive_datetime', 'article'),
        ('interpret/follow', 'follow'),
        ('posttype/tantek_photo', 'photo'),
        ('posttype/only_html_content', 'note'),
            # TODO add more tests
    ]:
        parsed = json.load(open('tests/' + test + '.json'))
        entry = mf2util.find_first_entry(parsed, ['h-entry', 'h-event'])
        assert implied_type == mf2util.post_type_discovery(entry)

Example #7

0

Show file

def html_to_activities(html, url=None, actor=None):
  """Converts a microformats2 HTML h-feed to ActivityStreams activities.

  Args:
    html: string HTML
    url: optional string URL that HTML came from
    actor: optional author AS actor object for all activities. usually comes
      from a rel="author" link.

  Returns:
    list of ActivityStreams activity dicts
  """
  parsed = mf2py.parse(doc=html, url=url)
  hfeed = mf2util.find_first_entry(parsed, ['h-feed'])
  items = hfeed.get('children', []) if hfeed else parsed.get('items', [])
  return [{'object': json_to_object(item, actor=actor)} for item in items]

Example #8

0

Show file

def convert_to_as2(url):
    """Fetch a URL as HTML, convert it to AS2, and return it.

    Currently mainly for Pixelfed.
    https://github.com/snarfed/bridgy-fed/issues/39
    """
    mf2 = util.fetch_mf2(url)
    entry = mf2util.find_first_entry(mf2, ['h-entry'])
    logging.info(f"Parsed mf2 for {mf2['url']}: {json_dumps(entry, indent=2)}")

    obj = common.postprocess_as2(
        as2.from_as1(microformats2.json_to_object(entry)))
    logging.info(f'Returning: {json_dumps(obj, indent=2)}')

    return obj, {
        'Content-Type': common.CONTENT_TYPE_AS2,
        'Access-Control-Allow-Origin': '*',
    }

Example #9

0

Show file

File: services.py Project: thedod/redwind

def convert_mf2util():
    def dates_to_string(json):
        if isinstance(json, dict):
            return {k: dates_to_string(v) for (k, v) in json.items()}
        if isinstance(json, list):
            return [dates_to_string(v) for v in json]
        if isinstance(json, datetime.date) or isinstance(json, datetime.datetime):
            return json.isoformat()
        return json

    url = request.args.get('url')
    if url:
        d = mf2py.Parser(url=url).to_dict()
        if mf2util.find_first_entry(d, ['h-feed']):
            json = mf2util.interpret_feed(d, url)
        else:
            json = mf2util.interpret(d, url)
        return jsonify(dates_to_string(json))
    return """

Example #10

0

Show file

    def convert_to_as2(self, url):
        """Fetch a URL as HTML, convert it to AS2, and return it.

        Currently mainly for Pixelfed.
        https://github.com/snarfed/bridgy-fed/issues/39
        """
        mf2 = util.fetch_mf2(url)
        entry = mf2util.find_first_entry(mf2, ['h-entry'])
        logging.info('Parsed mf2 for %s: %s', mf2['url'],
                     json_dumps(entry, indent=2))

        obj = self.postprocess_as2(
            as2.from_as1(microformats2.json_to_object(entry)))
        logging.info('Returning: %s', json_dumps(obj, indent=2))

        self.response.headers.update({
            'Content-Type': common.CONTENT_TYPE_AS2,
            'Access-Control-Allow-Origin': '*',
        })
        self.response.write(json_dumps(obj, indent=2))

Example #11

0

Show file

File: webmention.py Project: snarfed/bridgy-fed

    def dispatch_request(self):
        logging.info(f'Params: {list(request.form.items())}')

        # fetch source page
        source = flask_util.get_required_param('source')
        source_resp = common.requests_get(source)
        self.source_url = source_resp.url or source
        self.source_domain = urllib.parse.urlparse(
            self.source_url).netloc.split(':')[0]
        self.source_mf2 = util.parse_mf2(source_resp)

        # logging.debug(f'Parsed mf2 for {source_resp.url} : {json_dumps(self.source_mf2 indent=2)}')

        # check for backlink to bridgy fed (for webmention spec and to confirm
        # source's intent to federate to mastodon)
        if (request.host_url not in source_resp.text and urllib.parse.quote(
                request.host_url, safe='') not in source_resp.text):
            error("Couldn't find link to {request.host_url}")

        # convert source page to ActivityStreams
        entry = mf2util.find_first_entry(self.source_mf2, ['h-entry'])
        if not entry:
            error(f'No microformats2 found on {self.source_url}')

        logging.info(f'First entry: {json_dumps(entry, indent=2)}')
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [self.source_url]

        self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info(
            f'Converted to AS1: {json_dumps(self.source_obj, indent=2)}')

        for method in self.try_activitypub, self.try_salmon:
            ret = method()
            if ret:
                return ret

        return ''

Example #12

0

Show file

File: original_post_discovery.py Project: tantek/bridgy

def _find_feed_items(feed_url, feed_doc):
  """Extract feed items from a given URL and document. If the top-level
  h-* item is an h-feed, return its children. Otherwise, returns the
  top-level items.

  Args:
    feed_url: a string. the URL passed to mf2py parser
    feed_doc: a string or BeautifulSoup object. document is passed to
      mf2py parser

  Returns:
    a list of dicts, each one representing an mf2 h-* item
  """
  parsed = mf2py.parse(url=feed_url, doc=feed_doc)

  feeditems = parsed['items']
  hfeed = mf2util.find_first_entry(parsed, ('h-feed',))
  if hfeed:
    feeditems = hfeed.get('children', [])
  else:
    logging.debug('No h-feed found, fallback to top-level h-entrys.')
  return feeditems

Example #13

0

Show file

File: microformats2.py Project: cacimatti/granary

def html_to_activities(html, url=None, actor=None):
    """Converts a microformats2 HTML h-feed to ActivityStreams activities.

  Args:
    html: string HTML
    url: optional string URL that HTML came from
    actor: optional author AS actor object for all activities. usually comes
      from a rel="author" link.

  Returns:
    list of ActivityStreams activity dicts
  """
    parsed = mf2py.parse(doc=html, url=url)
    hfeed = mf2util.find_first_entry(parsed, ['h-feed'])
    items = hfeed.get('children', []) if hfeed else parsed.get('items', [])

    activities = []
    for item in items:
        obj = json_to_object(item, actor=actor)
        obj['content_is_html'] = True
        activities.append({'object': obj})

    return activities

Example #14

0

Show file

File: services.py Project: Lancey6/redwind

def convert_mf2util():
    def dates_to_string(json):
        if isinstance(json, dict):
            return {k: dates_to_string(v) for (k, v) in json.items()}
        if isinstance(json, list):
            return [dates_to_string(v) for v in json]
        if isinstance(json, datetime.date) or isinstance(json, datetime.datetime):
            return json.isoformat()
        return json

    url = request.args.get('url')
    as_feed = request.args.get('as-feed')
    if url:
        try:
            d = mf2py.parse(url=url)
            if as_feed == 'true' or mf2util.find_first_entry(d, ['h-feed']):
                json = mf2util.interpret_feed(d, url)
            else:
                json = mf2util.interpret(d, url)
            return jsonify(dates_to_string(json))
        except:
            return jsonify({'error': str(sys.exc_info()[0])})

    return """

Example #15

0

Show file

File: webmention.py Project: omphalos/bridgy-fed

    def try_activitypub(self):
        source = util.get_required_param(self, 'source')

        # fetch source page, convert to ActivityStreams
        source_resp = common.requests_get(source)
        source_url = source_resp.url or source
        source_mf2 = mf2py.parse(source_resp.text, url=source_url)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2))

        entry = mf2util.find_first_entry(source_mf2, ['h-entry'])
        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [source_url]

        source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2))

        # fetch target page as AS object. target is first in-reply-to, like-of,
        # or repost-of, *not* target query param.)
        target = util.get_url(util.get_first(source_obj, 'inReplyTo') or
                              util.get_first(source_obj, 'object'))
        if not target:
            common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of '
                         'found in %s' % source_url)

        try:
            target_resp = common.get_as2(target)
        except (requests.HTTPError, exc.HTTPBadGateway) as e:
            if (e.response.status_code // 100 == 2 and
                common.content_type(e.response).startswith('text/html')):
                self.resp = Response.get_or_create(
                    source=source_url, target=e.response.url or target,
                    direction='out', source_mf2=json.dumps(source_mf2))
                return self.send_salmon(source_obj, target_resp=e.response)
            raise

        target_url = target_resp.url or target
        self.resp = Response.get_or_create(
            source=source_url, target=target_url, direction='out',
            protocol='activitypub', source_mf2=json.dumps(source_mf2))

        # find actor's inbox
        target_obj = target_resp.json()
        inbox_url = target_obj.get('inbox')

        if not inbox_url:
            # TODO: test actor/attributedTo and not, with/without inbox
            actor = target_obj.get('actor') or target_obj.get('attributedTo')
            if isinstance(actor, dict):
                inbox_url = actor.get('inbox')
                actor = actor.get('url')
            if not inbox_url and not actor:
                common.error(self, 'Target object has no actor or attributedTo URL')

        if not inbox_url:
            # fetch actor as AS object
            actor = common.get_as2(actor).json()
            inbox_url = actor.get('inbox')

        if not inbox_url:
            # TODO: probably need a way to save errors like this so that we can
            # return them if ostatus fails too.
            # common.error(self, 'Target actor has no inbox')
            return self.send_salmon(source_obj, target_resp=target_resp)

        # convert to AS2
        source_domain = urlparse.urlparse(source_url).netloc
        key = MagicKey.get_or_create(source_domain)
        source_activity = common.postprocess_as2(
            as2.from_as1(source_obj), target=target_obj, key=key)

        if self.resp.status == 'complete':
            source_activity['type'] = 'Update'

        # prepare HTTP Signature (required by Mastodon)
        # https://w3c.github.io/activitypub/#authorization-lds
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07
        # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
        acct = 'acct:%s@%s' % (source_domain, source_domain)
        auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct,
                                 algorithm='rsa-sha256')

        # deliver source object to target actor's inbox.
        headers = {
            'Content-Type': common.CONTENT_TYPE_AS2,
            # required for HTTP Signature
            # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
            'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
        }
        inbox_url = urlparse.urljoin(target_url, inbox_url)
        resp = common.requests_post(inbox_url, json=source_activity, auth=auth,
                                    headers=headers)
        self.response.status_int = resp.status_code
        if resp.status_code == 202:
            self.response.write('202 response! If this is Mastodon 1.x, their '
                                'signature verification probably failed. :(\n')
        self.response.write(resp.text)

Example #16

0

Show file

    def get(self):
        input = util.get_required_param(self, 'input')
        if input not in INPUTS:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, INPUTS))

        orig_url = util.get_required_param(self, 'url')
        fragment = urllib.parse.urlparse(orig_url).fragment
        if fragment and input != 'html':
            raise exc.HTTPBadRequest(
                'URL fragments only supported with input=html.')

        resp = util.requests_get(orig_url, gateway=True)
        final_url = resp.url

        # decode data
        if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2',
                     'jsonfeed'):
            try:
                body_json = json_loads(resp.text)
                body_items = (body_json if isinstance(body_json, list) else
                              body_json.get('items') or [body_json])
            except (TypeError, ValueError):
                raise exc.HTTPBadRequest('Could not decode %s as JSON' %
                                         final_url)

        mf2 = None
        if input == 'html':
            mf2 = util.parse_mf2(resp, id=fragment)
            if id and not mf2:
                raise exc.HTTPBadRequest(
                    'Got fragment %s but no element found with that id.' %
                    fragment)
        elif input in ('mf2-json', 'json-mf2'):
            mf2 = body_json
            if not hasattr(mf2, 'get'):
                raise exc.HTTPBadRequest(
                    'Expected microformats2 JSON input to be dict, got %s' %
                    mf2.__class__.__name__)
            mf2.setdefault('rels', {})  # mf2util expects rels

        actor = None
        title = None
        hfeed = None
        if mf2:

            def fetch_mf2_func(url):
                if util.domain_or_parent_in(
                        urllib.parse.urlparse(url).netloc, SILO_DOMAINS):
                    return {
                        'items': [{
                            'type': ['h-card'],
                            'properties': {
                                'url': [url]
                            }
                        }]
                    }
                return util.fetch_mf2(url, gateway=True)

            try:
                actor = microformats2.find_author(
                    mf2, fetch_mf2_func=fetch_mf2_func)
                title = microformats2.get_title(mf2)
                hfeed = mf2util.find_first_entry(mf2, ['h-feed'])
            except (KeyError, ValueError) as e:
                raise exc.HTTPBadRequest('Could not parse %s as %s: %s' %
                                         (final_url, input, e))

        try:
            if input in ('as1', 'activitystreams'):
                activities = body_items
            elif input == 'as2':
                activities = [as2.to_as1(obj) for obj in body_items]
            elif input == 'atom':
                try:
                    activities = atom.atom_to_activities(resp.text)
                except ElementTree.ParseError as e:
                    raise exc.HTTPBadRequest('Could not parse %s as XML: %s' %
                                             (final_url, e))
                except ValueError as e:
                    raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' %
                                             (final_url, e))
            elif input == 'html':
                activities = microformats2.html_to_activities(resp,
                                                              url=final_url,
                                                              id=fragment,
                                                              actor=actor)
            elif input in ('mf2-json', 'json-mf2'):
                activities = [
                    microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])
                ]
            elif input == 'jsonfeed':
                activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
        except ValueError as e:
            logging.warning('parsing input failed', stack_info=True)
            self.abort(
                400,
                'Could not parse %s as %s: %s' % (final_url, input, str(e)))

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=final_url,
            actor=actor,
            title=title,
            hfeed=hfeed)

Example #17

0

Show file

File: app.py Project: snarfed/granary

  def get(self):
    input = util.get_required_param(self, 'input')
    if input not in INPUTS:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, INPUTS))
    url, body = self._fetch(util.get_required_param(self, 'url'))

    # decode data
    if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2', 'jsonfeed'):
      try:
        body_json = json.loads(body)
        body_items = (body_json if isinstance(body_json, list)
                      else body_json.get('items') or [body_json])
      except (TypeError, ValueError):
        raise exc.HTTPBadRequest('Could not decode %s as JSON' % url)

    mf2 = None
    if input == 'html':
      mf2 = mf2py.parse(doc=body, url=url, img_with_alt=True)
    elif input in ('mf2-json', 'json-mf2'):
      mf2 = body_json
      if not hasattr(mf2, 'get'):
        raise exc.HTTPBadRequest(
          'Expected microformats2 JSON input to be dict, got %s' %
          mf2.__class__.__name__)
      mf2.setdefault('rels', {})  # mf2util expects rels

    actor = None
    title = None
    hfeed = None
    if mf2:
      def fetch_mf2_func(url):
        if util.domain_or_parent_in(urlparse.urlparse(url).netloc, SILO_DOMAINS):
          return {'items': [{'type': ['h-card'], 'properties': {'url': [url]}}]}
        _, doc = self._fetch(url)
        return mf2py.parse(doc=doc, url=url, img_with_alt=True)

      try:
        actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func)
        title = microformats2.get_title(mf2)
        hfeed = mf2util.find_first_entry(mf2, ['h-feed'])
      except (KeyError, ValueError) as e:
        raise exc.HTTPBadRequest('Could not parse %s as %s: %s' % (url, input, e))

    try:
      if input in ('as1', 'activitystreams'):
        activities = body_items
      elif input == 'as2':
        activities = [as2.to_as1(obj) for obj in body_items]
      elif input == 'atom':
        try:
          activities = atom.atom_to_activities(body)
        except ElementTree.ParseError as e:
          raise exc.HTTPBadRequest('Could not parse %s as XML: %s' % (url, e))
        except ValueError as e:
          raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' % (url, e))
      elif input == 'html':
        activities = microformats2.html_to_activities(body, url, actor)
      elif input in ('mf2-json', 'json-mf2'):
        activities = [microformats2.json_to_object(item, actor=actor)
                      for item in mf2.get('items', [])]
      elif input == 'jsonfeed':
        activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
    except ValueError as e:
      logging.warning('parsing input failed', exc_info=True)
      self.abort(400, 'Could not parse %s as %s: %s' % (url, input, str(e)))

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=actor, title=title, hfeed=hfeed)

Example #18

0

Show file

File: app.py Project: davidp94/granary

    def get(self):
        input = util.get_required_param(self, 'input')
        if input not in INPUTS:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, INPUTS))
        url, body = self._fetch(util.get_required_param(self, 'url'))

        # decode data
        if input in ('activitystreams', 'as1', 'as2', 'mf2-json', 'json-mf2',
                     'jsonfeed'):
            try:
                body_json = json.loads(body)
                body_items = (body_json if isinstance(body_json, list) else
                              body_json.get('items') or [body_json])
            except (TypeError, ValueError):
                raise exc.HTTPBadRequest('Could not decode %s as JSON' % url)

        mf2 = None
        if input == 'html':
            mf2 = mf2py.parse(doc=body, url=url, img_with_alt=True)
        elif input in ('mf2-json', 'json-mf2'):
            mf2 = body_json
            if not hasattr(mf2, 'get'):
                raise exc.HTTPBadRequest(
                    'Expected microformats2 JSON input to be dict, got %s' %
                    mf2.__class__.__name__)
            mf2.setdefault('rels', {})  # mf2util expects rels

        actor = None
        title = None
        hfeed = None
        if mf2:

            def fetch_mf2_func(url):
                if util.domain_or_parent_in(
                        urlparse.urlparse(url).netloc, SILO_DOMAINS):
                    return {
                        'items': [{
                            'type': ['h-card'],
                            'properties': {
                                'url': [url]
                            }
                        }]
                    }
                _, doc = self._fetch(url)
                return mf2py.parse(doc=doc, url=url, img_with_alt=True)

            try:
                actor = microformats2.find_author(
                    mf2, fetch_mf2_func=fetch_mf2_func)
                title = microformats2.get_title(mf2)
                hfeed = mf2util.find_first_entry(mf2, ['h-feed'])
            except (KeyError, ValueError) as e:
                raise exc.HTTPBadRequest('Could not parse %s as %s: %s' %
                                         (url, input, e))

        try:
            if input in ('as1', 'activitystreams'):
                activities = body_items
            elif input == 'as2':
                activities = [as2.to_as1(obj) for obj in body_items]
            elif input == 'atom':
                try:
                    activities = atom.atom_to_activities(body)
                except ElementTree.ParseError as e:
                    raise exc.HTTPBadRequest('Could not parse %s as XML: %s' %
                                             (url, e))
                except ValueError as e:
                    raise exc.HTTPBadRequest('Could not parse %s as Atom: %s' %
                                             (url, e))
            elif input == 'html':
                activities = microformats2.html_to_activities(body, url, actor)
            elif input in ('mf2-json', 'json-mf2'):
                activities = [
                    microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])
                ]
            elif input == 'jsonfeed':
                activities, actor = jsonfeed.jsonfeed_to_activities(body_json)
        except ValueError as e:
            logging.warning('parsing input failed', exc_info=True)
            self.abort(400,
                       'Could not parse %s as %s: %s' % (url, input, str(e)))

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=url,
            actor=actor,
            title=title,
            hfeed=hfeed)

Example #19

0

Show file

File: microformats.py Project: canopy/understory

def interpret_entry(
    parsed,
    source_url,
    base_href=None,
    hentry=None,
    use_rel_syndication=True,
    want_json=False,
    fetch_mf2_func=None,
):
    """
    Given a document containing an h-entry, return a dictionary.

        {'type': 'entry',
         'url': permalink of the document (may be different than source_url),
         'published': datetime or date,
         'updated': datetime or date,
         'name': title of the entry,
         'content': body of entry (contains HTML),
         'author': {
          'name': author name,
          'url': author url,
          'photo': author photo
         },
         'syndication': [
           'syndication url',
           ...
         ],
         'in-reply-to': [...],
         'like-of': [...],
         'repost-of': [...]}

    :param dict parsed: the result of parsing a document containing mf2 markup
    :param str source_url: the URL of the parsed document, used by the
      authorship algorithm
    :param str base_href: (optional) the href value of the base tag
    :param dict hentry: (optional) the item in the above document
      representing the h-entry. if provided, we can avoid a redundant
      call to find_first_entry
    :param boolean use_rel_syndication: (optional, default True) Whether
      to include rel=syndication in the list of syndication sources. Sometimes
      useful to set this to False when parsing h-feeds that erroneously include
      rel=syndication on each entry.
    :param boolean want_json: (optional, default False) if true, the result
      will be pure json with datetimes as strings instead of python objects
    :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
      output for a given URL.
    :return: a dict with some or all of the described properties

    """
    # find the h-entry if it wasn't provided
    if not hentry:
        hentry = util.find_first_entry(parsed, ["h-entry"])
        if not hentry:
            return {}

    result = _interpret_common_properties(
        parsed,
        source_url,
        base_href,
        hentry,
        use_rel_syndication,
        want_json,
        fetch_mf2_func,
    )
    if "h-cite" in hentry.get("type", []):
        result["type"] = "cite"
    else:
        result["type"] = "entry"

    # NOTE patch start
    if "category" in hentry["properties"]:
        result["category"] = hentry["properties"]["category"]
    if "pubkey" in hentry["properties"]:
        result["pubkey"] = hentry["properties"]["pubkey"]
    if "vote" in hentry["properties"]:
        result["vote"] = hentry["properties"]["vote"]
    # NOTE patch end

    title = util.get_plain_text(hentry["properties"].get("name"))
    if title and util.is_name_a_title(title, result.get("content-plain")):
        result["name"] = title

    for prop in (
        "in-reply-to",
        "like-of",
        "repost-of",
        "bookmark-of",
        "vote-on",
        "comment",
        "like",
        "repost",
    ):  # NOTE added vote-on
        for url_val in hentry["properties"].get(prop, []):
            if isinstance(url_val, dict):
                result.setdefault(prop, []).append(
                    util.interpret(
                        parsed,
                        source_url,
                        base_href,
                        url_val,
                        use_rel_syndication=False,
                        want_json=want_json,
                        fetch_mf2_func=fetch_mf2_func,
                    )
                )
            else:
                result.setdefault(prop, []).append(
                    {
                        "url": url_val,
                    }
                )

    return result

Example #20

0

Show file

File: unfurl.py Project: drivet/indieweb-utils

def fetch_post_type(parsed):
    hentry = mf2util.find_first_entry(parsed, ['h-entry'])
    if hentry:
        return mf2util.post_type_discovery(hentry)
    else:
        return 'note'