def test_parse_datetimes(): def assert_with_tz(dt, naive, offset): """return a tuple with naive datetime, and an timedelta tz offset""" assert naive == dt.replace(tzinfo=None) assert offset == dt.utcoffset() # waterpigs.co.uk -- utc time assert_with_tz(mf2util.parse_datetime('2014-05-10T10:48:28+00:00'), datetime(2014, 5, 10, 10, 48, 28), timedelta(hours=0)) # same as above with Zulu time assert_with_tz(mf2util.parse_datetime('2014-05-10T10:48:28Z'), datetime(2014, 5, 10, 10, 48, 28), timedelta(hours=0)) # snarfed.org -- pacific time assert_with_tz(mf2util.parse_datetime('2014-05-05T09:59:08-07:00'), datetime(2014, 5, 5, 9, 59, 8), timedelta(hours=-7)) # same as above, no colon in tz assert_with_tz(mf2util.parse_datetime('2014-05-05T09:59:08-0700'), datetime(2014, 5, 5, 9, 59, 8), timedelta(hours=-7)) with pytest.raises(ValueError): # cannot read timezones by name mf2util.parse_datetime('2013-07-04T11:22 PST')
def test_parse_datetimes(): def assert_with_tz(dt, naive, offset): """return a tuple with naive datetime, and an timedelta tz offset""" assert naive == dt.replace(tzinfo=None) assert offset == dt.utcoffset() # waterpigs.co.uk -- utc time assert_with_tz(mf2util.parse_datetime('2014-05-10T10:48:28+00:00'), datetime(2014, 5, 10, 10, 48, 28), timedelta(hours=0)) # same as above with Zulu time assert_with_tz(mf2util.parse_datetime('2014-05-10T10:48:28Z'), datetime(2014, 5, 10, 10, 48, 28), timedelta(hours=0)) # snarfed.org -- pacific time assert_with_tz(mf2util.parse_datetime('2014-05-05T09:59:08-07:00'), datetime(2014, 5, 5, 9, 59, 8), timedelta(hours=-7)) # same as above, no colon in tz assert_with_tz(mf2util.parse_datetime('2014-05-05T09:59:08-0700'), datetime(2014, 5, 5, 9, 59, 8), timedelta(hours=-7)) with pytest.raises(ValueError): # cannot read timezones by name mf2util.parse_datetime('2013-07-04T11:22 PST')
def test_parse_dates(): assert mf2util.parse_datetime('2014-04-27') == date(2014, 4, 27) assert mf2util.parse_datetime('2014-9-2') == date(2014, 9, 2) assert mf2util.parse_datetime('1982-11-24') == date(1982, 11, 24) with pytest.raises(ValueError): # day/month switched mf2util.parse_datetime('2014-24-11') with pytest.raises(ValueError): # 2-character year mf2util.parse_datetime('14-09-27')
def test_parse_dates(): assert mf2util.parse_datetime('2014-04-27') == date(2014, 4, 27) assert mf2util.parse_datetime('2014-9-2') == date(2014, 9, 2) assert mf2util.parse_datetime('1982-11-24') == date(1982, 11, 24) with pytest.raises(ValueError): # day/month switched mf2util.parse_datetime('2014-24-11') with pytest.raises(ValueError): # 2-character year mf2util.parse_datetime('14-09-27')
def test_parse_datetimes_no_tz(): # tantek.com -- no seconds, no timezone assert mf2util.parse_datetime('2014-05-09T17:53') == datetime(2014, 5, 9, 17, 53) # same as above without 'T' assert mf2util.parse_datetime('2014-05-09 17:53') == datetime(2014, 5, 9, 17, 53) # Homebrew Website Club assert mf2util.parse_datetime('2014-04-23T18:30') == datetime(2014, 4, 23, 18, 30) with pytest.raises(ValueError): # hour only mf2util.parse_datetime('2012-09-01T12') with pytest.raises(ValueError): # invalid hour minute mf2util.parse_datetime('2014-04-23T30:90')
def _populate_cache_for_url(url, ctx): import mf2util import dateutil.parser silos = get_named_silos(ctx.silos, ctx.args.silo) until_dt = None if ctx.args.until: until_dt = dateutil.parser.parse(ctx.args.until).date() logger.debug("Populating cache until: %s" % until_dt) mf_obj = parse_mf2(url) mf_dict = mf_obj.to_dict() for entry in mf_dict.get('items', []): entry_props = entry.get('properties') if not entry_props: logger.warning("Found entry without any properties.") continue entry_url = entry_props.get('url') if not entry_url: logger.warning("Found entry without any URL.") continue if isinstance(entry_url, list): entry_url = entry_url[0] if until_dt: entry_published = entry_props.get('published') if not entry_published: logger.warning("Entry '%s' has not published date." % entry_url) continue if isinstance(entry_published, list): entry_published = entry_published[0] entry_published_dt = mf2util.parse_datetime(entry_published) if entry_published_dt and entry_published_dt.date() > until_dt: continue logger.debug("Adding entry to cache: %s" % entry_url) for silo in silos: ctx.cache.addPost(silo.name, entry_url)
def test_parse_datetimes_no_tz(): # tantek.com -- no seconds, no timezone assert mf2util.parse_datetime('2014-05-09T17:53') == datetime( 2014, 5, 9, 17, 53) # same as above without 'T' assert mf2util.parse_datetime('2014-05-09 17:53') == datetime( 2014, 5, 9, 17, 53) # Homebrew Website Club assert mf2util.parse_datetime('2014-04-23T18:30') == datetime( 2014, 4, 23, 18, 30) with pytest.raises(ValueError): # hour only mf2util.parse_datetime('2012-09-01T12') with pytest.raises(ValueError): # invalid hour minute mf2util.parse_datetime('2014-04-23T30:90')
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, str)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = (util.get_url(hfeed.get('properties', {}), 'photo') or util.get_url(actor, 'image')) if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None feed_has_enclosure = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') id = obj.get('id') or url item.id(id) item.link(href=url) item.guid(url, permalink=True) # title (required) title = (obj.get('title') or obj.get('displayName') or util.ellipsize(obj.get('content', '-'))) # strip HTML tags title = util.parse_html(title).get_text('').strip() item.title(title) content = microformats2.render_content(obj, include_location=True, render_attachments=True, render_image=True) if not content: content = obj.get('summary') if content: item.content(content, type='CDATA') categories = [ { 'term': t['displayName'] } for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share') and t.get('objectType') not in ('article', 'person', 'mention') ] item.category(categories) author = obj.get('author', {}) author = { 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), 'email': author.get('email') or '-', } item.author(author) published = obj.get('published') or obj.get('updated') if published and isinstance(published, str): try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass item_has_enclosure = False for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): if item_has_enclosure: logging.info( 'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s', id, url) continue item_has_enclosure = feed_has_enclosure = True item.enclosure(url=url, type=mime, length=str(stream.get('size', ''))) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if feed_has_enclosure: fg.load_extension('podcast') fg.podcast.itunes_author( actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) name = author.get('name') if name: fg.podcast.itunes_author(name) if image: fg.podcast.itunes_image(image) fg.podcast.itunes_category(categories) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8')
def test_none(): assert mf2util.parse_datetime(None) is None
def _interpret_common_properties( parsed, source_url, base_href, hentry, use_rel_syndication, want_json, fetch_mf2_func, ): result = {} props = hentry["properties"] for prop in ("url", "uid", "photo", "featured" "logo"): value = util.get_plain_text(props.get(prop)) if value: result[prop] = value for prop in ("start", "end", "published", "updated", "deleted"): date_str = util.get_plain_text(props.get(prop)) if date_str: if want_json: result[prop] = date_str else: result[prop + "-str"] = date_str try: date = util.parse_datetime(date_str) if date: result[prop] = date except ValueError: util.logging.warn("Failed to parse datetime %s", date_str) author = util.find_author(parsed, source_url, hentry, fetch_mf2_func) if author: result["author"] = author content_prop = props.get("content") content_value = None if content_prop: if isinstance(content_prop[0], dict): content_html = content_prop[0].get("html", "").strip() content_value = content_prop[0].get("value", "").strip() else: content_value = content_html = content_prop[0] result["content"] = util.convert_relative_paths_to_absolute( source_url, base_href, content_html ) result["content-plain"] = content_value summary_prop = props.get("summary") if summary_prop: if isinstance(summary_prop[0], dict): result["summary"] = summary_prop[0]["value"] else: result["summary"] = summary_prop[0] # Collect location objects, then follow this algorithm to consolidate # their properties: # //indieweb.org/location#How_to_determine_the_location_of_a_microformat location_stack = [props] for prop in "location", "adr": vals = props.get(prop) if vals: if isinstance(vals[0], util.string_type): location_stack.append({"name": vals}) else: location_stack.append(vals[0].get("properties", {})) geo = props.get("geo") if geo: if isinstance(geo[0], dict): location_stack.append(geo[0].get("properties", {})) else: if geo[0].startswith("geo:"): # a geo: URL. try to parse it. # //tools.ietf.org/html/rfc5870 parts = geo[0][len("geo:") :].split(";")[0].split(",") if len(parts) >= 2: location_stack.append( { "latitude": [parts[0]], "longitude": [parts[1]], "altitude": [parts[2]] if len(parts) >= 3 else [], } ) for prop in util.LOCATION_PROPERTIES: for obj in location_stack: if obj and obj.get(prop) and not (obj == props and prop == "name"): result.setdefault("location", {})[prop] = obj[prop][0] if use_rel_syndication: result["syndication"] = list( set( parsed.get("rels", {}).get("syndication", []) + hentry["properties"].get("syndication", []) ) ) else: result["syndication"] = hentry["properties"].get("syndication", []) # TODO patch start checkin_prop = props.get("checkin") if checkin_prop: if isinstance(checkin_prop[0], dict): props = checkin_prop[0]["properties"] result["checkin"] = {"name": props["name"][0]} try: result.update( { "latitude": props["latitude"][0], "longitude": props["longitude"][0], } ) except KeyError: pass else: result["checkin"] = checkin_prop[0] categories = props.get("category") if categories: result["category"] = categories # TODO patch end return result
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, basestring)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = util.get_url(hfeed, 'image') or util.get_url(actor, 'image') if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None enclosures = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') item.id(obj.get('id') or url) item.link(href=url) item.guid(url, permalink=True) item.title(obj.get('title') or obj.get('displayName') or '-') # required content = microformats2.render_content( obj, include_location=True, render_attachments=False) or obj.get('summary') if content: item.content(content, type='CDATA') item.category( [{'term': t['displayName']} for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share')]) author = obj.get('author', {}) item.author({ 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), }) published = obj.get('published') or obj.get('updated') if published: try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): enclosures = True item.enclosure(url=url, type=mime, length='REMOVEME') # TODO: length (bytes) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if enclosures: fg.load_extension('podcast') fg.podcast.itunes_author(actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8').replace(' length="REMOVEME"', '')
def test_none(): assert mf2util.parse_datetime(None) is None