Ejemplo n.º 1
0
def _find_feed_items(mf2):
    """Extract feed items from given microformats2 data.

  If the top-level h-* item is an h-feed, return its children. Otherwise,
  returns the top-level items.

  Args:
    mf2: dict, parsed mf2 data

  Returns: list of dicts, each one representing an mf2 h-* item
  """
    feeditems = mf2['items']
    hfeeds = mf2util.find_all_entries(mf2, ('h-feed', ))
    if hfeeds:
        feeditems = list(
            itertools.chain.from_iterable(
                hfeed.get('children', []) for hfeed in hfeeds))
    else:
        logger.debug('No h-feed found, fallback to top-level h-entrys.')

    if len(feeditems) > MAX_FEED_ENTRIES:
        logger.info(
            f'Feed has {len(feeditems)} entries! only processing the first {MAX_FEED_ENTRIES}.'
        )
        feeditems = feeditems[:MAX_FEED_ENTRIES]

    return feeditems
Ejemplo n.º 2
0
def _find_feed_items(feed_url, feed_doc):
  """Extract feed items from a given URL and document. If the top-level
  h-* item is an h-feed, return its children. Otherwise, returns the
  top-level items.

  Args:
    feed_url: a string. the URL passed to mf2py parser
    feed_doc: a string or BeautifulSoup object. document is passed to
      mf2py parser

  Returns:
    a list of dicts, each one representing an mf2 h-* item
  """
  parsed = util.mf2py_parse(feed_doc, feed_url)

  feeditems = parsed['items']
  hfeeds = mf2util.find_all_entries(parsed, ('h-feed',))
  if hfeeds:
    feeditems = list(itertools.chain.from_iterable(
      hfeed.get('children', []) for hfeed in hfeeds))
  else:
    logging.debug('No h-feed found, fallback to top-level h-entrys.')

  if len(feeditems) > MAX_FEED_ENTRIES:
    logging.info('%s has %s entries! only processing the first %s.',
                 feed_url, len(feeditems), MAX_FEED_ENTRIES)
    feeditems = feeditems[:MAX_FEED_ENTRIES]

  return feeditems
Ejemplo n.º 3
0
def _find_feed_items(feed_url, feed_doc):
    """Extract feed items from a given URL and document. If the top-level
  h-* item is an h-feed, return its children. Otherwise, returns the
  top-level items.

  Args:
    feed_url: a string. the URL passed to mf2py parser
    feed_doc: a string or BeautifulSoup object. document is passed to
      mf2py parser

  Returns:
    a list of dicts, each one representing an mf2 h-* item
  """
    parsed = util.mf2py_parse(feed_doc, feed_url)

    feeditems = parsed['items']
    hfeeds = mf2util.find_all_entries(parsed, ('h-feed', ))
    if hfeeds:
        feeditems = list(
            itertools.chain.from_iterable(
                hfeed.get('children', []) for hfeed in hfeeds))
    else:
        logging.debug('No h-feed found, fallback to top-level h-entrys.')

    if len(feeditems) > MAX_FEED_ENTRIES:
        logging.info('%s has %s entries! only processing the first %s.',
                     feed_url, len(feeditems), MAX_FEED_ENTRIES)
        feeditems = feeditems[:MAX_FEED_ENTRIES]

    return feeditems
Ejemplo n.º 4
0
def _find_feed_items(feed_url, feed_doc):
  """Extract feed items from a given URL and document. If the top-level
  h-* item is an h-feed, return its children. Otherwise, returns the
  top-level items.

  Args:
    feed_url: a string. the URL passed to mf2py parser
    feed_doc: a string or BeautifulSoup object. document is passed to
      mf2py parser

  Returns:
    a list of dicts, each one representing an mf2 h-* item
  """
  parsed = mf2py.parse(url=feed_url, doc=feed_doc)

  feeditems = parsed['items']
  hfeeds = mf2util.find_all_entries(parsed, ('h-feed',))
  if hfeeds:
    feeditems = list(itertools.chain.from_iterable(
      hfeed.get('children', []) for hfeed in hfeeds))
  else:
    logging.debug('No h-feed found, fallback to top-level h-entrys.')
  return feeditems
Ejemplo n.º 5
0
def representative_card(parsed, source_url):
    """
    Find the representative card for a URL.

    http://microformats.org/wiki/representative-h-card-parsing

    :param dict parsed: an mf2 parsed dict
    :param str source_url: the source of the parsed document.
    :return: the representative h-card if one is found

    """
    # FIXME NOTE mf2py needs plain mf2json
    parsed = json.loads(JSONEncoder().encode(parsed))  # FIXME
    source_url = uri.parse(source_url).minimized
    cards = [
        h
        for h in util.find_all_entries(parsed, ["h-card"], include_properties=True)
        if (
            h["properties"].get("name", [""])[0]
            or h["properties"].get("nickname", [""])[0]
        )
    ]
    if match := _check_uid_and_url_match_source_url(source_url, cards):
        return match