def test_autolink_simple(): result = util.autolink('This is a simple link to http://example.com') assert result == 'This is a simple link to <a class="auto-link" href="http://example.com">http://example.com</a>' result = util.autolink('A link without a schema jason.com/friday13th maybe?') assert result == 'A link without a schema <a class="auto-link" href="http://jason.com/friday13th">jason.com/friday13th</a> maybe?' result = util.autolink('Shortened link is.gd/me for ex.') assert result == 'Shortened link <a class="auto-link" href="http://is.gd/me">is.gd/me</a> for ex.'
def test_autolink_simple(): result = util.autolink('This is a simple link to http://example.com') assert result == 'This is a simple link to <a class="auto-link" href="http://example.com">http://example.com</a>' result = util.autolink( 'A link without a schema jason.com/friday13th maybe?') assert result == 'A link without a schema <a class="auto-link" href="http://jason.com/friday13th">jason.com/friday13th</a> maybe?' result = util.autolink('Shortened link is.gd/me for ex.') assert result == 'Shortened link <a class="auto-link" href="http://is.gd/me">is.gd/me</a> for ex.'
def guess_content(post): name = None picture = None link = None message = "" if post.title: message += post.title + "\n\n" html = util.autolink(util.markdown_filter(post.content)) message += util.format_as_text(html) if post.post_type != "article": message += " (" + post.shortlink + ")" if post.post_type == "share": link = next((s.url for s in post.repost_contexts), None) elif post.post_type == "article": name = post.title link = post.permalink elif post.post_type == "photo" and post.attachments: picture = post.attachments[0].url else: # first link becomes the target soup = BeautifulSoup(html) # filter out hashtags link = next(filter(lambda h: h and not h.startswith("#"), (a.get("href") for a in soup.find_all("a"))), None) return message, link, name, picture
def test_parsing_hashtags(): """Exercise the #-tag matching regex """ test_cases = [ ('#hashtag should be linked', '<a href="/tags/hashtag">#hashtag</a> should be linked', ['hashtag']), ('hashtag should not be linked', 'hashtag should not be linked', []), ('match #hashtags in the middle', 'match <a href="/tags/hashtags">#hashtags</a> in the middle', ['hashtags']), ('match a tag at the #end', 'match a tag at the <a href="/tags/end">#end</a>', ['end']), ('#1 should not be linked', '#1 should not be linked', []), ('#12345 should be linked', '<a href="/tags/12345">#12345</a> should be linked', ['12345']), ('#.foobar should not be linked', '#.foobar should not be linked', []), ('#foo.bar should be partially linked', '<a href="/tags/foo">#foo</a>.bar should be partially linked', ['foo']), ('capital letters in #HashTags will be lowercased', 'capital letters in <a href="/tags/hashtags">#HashTags</a> will be lowercased', ['hashtags']), ('duplicate #hashtags should parse both #hashtags fine', 'duplicate <a href="/tags/hashtags">#hashtags</a> should parse both <a href="/tags/hashtags">#hashtags</a> fine', ['hashtags','hashtags']), ('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent tincidunt aliquam sem, in tempus elit lacinia vel. Integer accumsan cursus purus et euismod. Nullam ultricies nunc sit amet ante consequat porta. Pellentesque et porta odio. Sed et neque cursus, iaculis lorem nec, laoreet odio. Donec molestie volutpat vestibulum. Curabitur rhoncus elit ut massa pretium luctus. #Nullam sollicitudin ligula vitae tincidunt suscipit. Maecenas in neque porta, scelerisque metus at, mollis nunc. Fusce accumsan imperdiet velit, in tincidunt tellus aliquam ac. Nullam iaculis vel urna sed vulputate. Aliquam erat volutpat. Etiam et tortor turpis. Vivamus mattis enim lacus, in aliquet nulla blandit.', 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent tincidunt aliquam sem, in tempus elit lacinia vel. Integer accumsan cursus purus et euismod. Nullam ultricies nunc sit amet ante consequat porta. Pellentesque et porta odio. Sed et neque cursus, iaculis lorem nec, laoreet odio. Donec molestie volutpat vestibulum. Curabitur rhoncus elit ut massa pretium luctus. <a href="/tags/nullam">#Nullam</a> sollicitudin ligula vitae tincidunt suscipit. Maecenas in neque porta, scelerisque metus at, mollis nunc. Fusce accumsan imperdiet velit, in tincidunt tellus aliquam ac. Nullam iaculis vel urna sed vulputate. Aliquam erat volutpat. Etiam et tortor turpis. Vivamus mattis enim lacus, in aliquet nulla blandit.', ['nullam']), ('this hash#tag will not be parsed', 'this hash#tag will not be parsed', []), ('http://example.com/path#fragment', '<a class="auto-link" href="http://example.com/path#fragment">http://example.com/path#fragment</a>', []), ] for inp, out, tags in test_cases: ts = util.find_hashtags(inp) res = util.autolink(inp) assert out == res assert tags == ts
def test_autolink_urls(): """Exercise the URL matching regex """ test_cases = [ ('this should be link.gy', 'this should be <a class="auto-link" href="http://link.gy">link.gy</a>'), ('this should not be link.linked', 'this should not be link.linked'), ('a link to is.gd/supplies, should end at the comma', 'a link to <a class="auto-link" href="http://is.gd/supplies">is.gd/supplies</a>, should end at the comma'), ('A link to example.com/q?u=a75$qrst&v should not terminate early', 'A link to <a class="auto-link" href="http://example.com/q?u=a75$qrst&v">example.com/q?u=a75$qrst&v</a> should not terminate early'), ('HTML links <a href="http://google.com">google.com</a> should not be affected', 'HTML links <a href="http://google.com">google.com</a> should not be affected'), ('Neither should <code><pre>http://fenced.code/blocks</pre></code>', 'Neither should <code><pre>http://fenced.code/blocks</pre></code>') ] for inp, out in test_cases: assert out == util.autolink(inp)
def test_parsing_hashtags(): """Exercise the #-tag matching regex """ test_cases = [ ('#hashtag should be linked', '<a href="/tags/hashtag">#hashtag</a> should be linked', ['hashtag']), ('hashtag should not be linked', 'hashtag should not be linked', []), ('match #hashtags in the middle', 'match <a href="/tags/hashtags">#hashtags</a> in the middle', ['hashtags']), ('match a tag at the #end', 'match a tag at the <a href="/tags/end">#end</a>', ['end']), ('#1 should not be linked', '#1 should not be linked', []), ('#12345 should be linked', '<a href="/tags/12345">#12345</a> should be linked', ['12345']), ('#.foobar should not be linked', '#.foobar should not be linked', []), ('#foo.bar should be partially linked', '<a href="/tags/foo">#foo</a>.bar should be partially linked', ['foo']), ('capital letters in #HashTags will be lowercased', 'capital letters in <a href="/tags/hashtags">#HashTags</a> will be lowercased', ['hashtags']), ('duplicate #hashtags should parse both #hashtags fine', 'duplicate <a href="/tags/hashtags">#hashtags</a> should parse both <a href="/tags/hashtags">#hashtags</a> fine', ['hashtags', 'hashtags']), ('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent tincidunt aliquam sem, in tempus elit lacinia vel. Integer accumsan cursus purus et euismod. Nullam ultricies nunc sit amet ante consequat porta. Pellentesque et porta odio. Sed et neque cursus, iaculis lorem nec, laoreet odio. Donec molestie volutpat vestibulum. Curabitur rhoncus elit ut massa pretium luctus. #Nullam sollicitudin ligula vitae tincidunt suscipit. Maecenas in neque porta, scelerisque metus at, mollis nunc. Fusce accumsan imperdiet velit, in tincidunt tellus aliquam ac. Nullam iaculis vel urna sed vulputate. Aliquam erat volutpat. Etiam et tortor turpis. Vivamus mattis enim lacus, in aliquet nulla blandit.', 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent tincidunt aliquam sem, in tempus elit lacinia vel. Integer accumsan cursus purus et euismod. Nullam ultricies nunc sit amet ante consequat porta. Pellentesque et porta odio. Sed et neque cursus, iaculis lorem nec, laoreet odio. Donec molestie volutpat vestibulum. Curabitur rhoncus elit ut massa pretium luctus. <a href="/tags/nullam">#Nullam</a> sollicitudin ligula vitae tincidunt suscipit. Maecenas in neque porta, scelerisque metus at, mollis nunc. Fusce accumsan imperdiet velit, in tincidunt tellus aliquam ac. Nullam iaculis vel urna sed vulputate. Aliquam erat volutpat. Etiam et tortor turpis. Vivamus mattis enim lacus, in aliquet nulla blandit.', ['nullam']), ('this hash#tag will not be parsed', 'this hash#tag will not be parsed', []), ('http://example.com/path#fragment', '<a class="auto-link" href="http://example.com/path#fragment">http://example.com/path#fragment</a>', []), ] for inp, out, tags in test_cases: ts = util.find_hashtags(inp) res = util.autolink(inp) assert out == res assert tags == ts
def test_autolink_urls(): """Exercise the URL matching regex """ def simple_url_marker(url, soup): return '<' + url + '>' test_cases = [ ('this should be link.ed', 'this should be <http://link.ed>'), ('this should not be link.linked', 'this should not be link.linked'), ('a link to is.gd/supplies, should end at the comma', 'a link to <http://is.gd/supplies>, should end at the comma'), ('A link to example.com/q?u=a75$qrst&v should not terminate early', 'A link to <http://example.com/q?u=a75$qrst&v> should not terminate early'), ('HTML links <a href="http://google.com">google.com</a> should not be affected', 'HTML links <a href="http://google.com">google.com</a> should not be affected'), ('Neither should <code><pre>http://fenced.code/blocks</pre></code>', 'Neither should <code><pre>http://fenced.code/blocks</pre></code>') ] for inp, out in test_cases: assert out == util.autolink( inp, person_processor=None, url_processor=simple_url_marker)
def test_autolink_urls(): """Exercise the URL matching regex """ test_cases = [ ('this should be link.gy', 'this should be <a class="auto-link" href="http://link.gy">link.gy</a>' ), ('this should not be link.linked', 'this should not be link.linked'), ('a link to is.gd/supplies, should end at the comma', 'a link to <a class="auto-link" href="http://is.gd/supplies">is.gd/supplies</a>, should end at the comma' ), ('A link to example.com/q?u=a75$qrst&v should not terminate early', 'A link to <a class="auto-link" href="http://example.com/q?u=a75$qrst&v">example.com/q?u=a75$qrst&v</a> should not terminate early' ), ('HTML links <a href="http://google.com">google.com</a> should not be affected', 'HTML links <a href="http://google.com">google.com</a> should not be affected' ), ('Neither should <code><pre>http://fenced.code/blocks</pre></code>', 'Neither should <code><pre>http://fenced.code/blocks</pre></code>') ] for inp, out in test_cases: assert out == util.autolink(inp)
def test_autolink_people(db): """Exercise the @-name matching regex, without contacts """ def simple_name_marker(contact, name, soup): return '<' + name + '>' test_cases = [ ('@han should be linked', '<han> should be linked'), ('[email protected] should not be', '[email protected] should not be'), ('@leia @luke @han', '<leia> <luke> <han>'), ('@leia@luke@han', '@leia@luke@han'), ('match a name at the end @kylewm', 'match a name at the end <kylewm>'), ('match a name followed by a period @kylewm.', 'match a name followed by a period <kylewm>.'), ('followed by a @comma, right?', 'followed by a <comma>, right?'), ] for inp, out in test_cases: assert out == util.autolink( inp, person_processor=simple_name_marker, url_processor=None)
def guess_content(post): name = None picture = None link = None message = '' if post.title: message += post.title + '\n\n' html = util.autolink( util.markdown_filter(post.content)) message += util.format_as_text(html) if post.post_type != 'article': message += ' (' + post.shortlink + ')' if post.post_type == 'share': link = next((s.url for s in post.repost_contexts), None) elif post.post_type == 'article': name = post.title link = post.permalink elif post.post_type == 'photo' and post.attachments: picture = post.attachments[0].url else: # first link becomes the target soup = BeautifulSoup(html) # filter out hashtags link = next(filter(lambda h: h and not h.startswith('#'), (a.get('href') for a in soup.find_all('a'))), None) return message, link, name, picture
def save_post(post): was_draft = post.draft pub_str = request.form.get('published') if pub_str: post.published = mf2util.parse_dt(pub_str) if post.published.tzinfo: post.published = post.published.astimezone(datetime.timezone.utc)\ .replace(tzinfo=None) if 'post_type' in request.form: post.post_type = request.form.get('post_type') start_str = request.form.get('start') if start_str: start = mf2util.parse_dt(start_str) if start: post.start = start post.start_utcoffset = start.utcoffset() end_str = request.form.get('end') if end_str: end = mf2util.parse_dt(end_str) if end: post.end = end post.end_utcoffset = end.utcoffset() now = datetime.datetime.utcnow() if not post.published or was_draft: post.published = now post.updated = now # populate the Post object and save it to the database, # redirect to the view post.title = request.form.get('title', '') post.content = request.form.get('content') post.draft = request.form.get('action') == 'save_draft' post.hidden = request.form.get('hidden', 'false') == 'true' post.friends_only = request.form.get('friends_only', 'false') == 'true' venue_name = request.form.get('new_venue_name') venue_lat = request.form.get('new_venue_latitude') venue_lng = request.form.get('new_venue_longitude') if venue_name and venue_lat and venue_lng: venue = Venue() venue.name = venue_name venue.location = { 'latitude': float(venue_lat), 'longitude': float(venue_lng), } venue.update_slug('{}-{}'.format(venue_lat, venue_lng)) db.session.add(venue) db.session.commit() hooks.fire('venue-saved', venue, request.form) post.venue = venue else: venue_id = request.form.get('venue') if venue_id: post.venue = Venue.query.get(venue_id) lat = request.form.get('latitude') lon = request.form.get('longitude') if lat and lon: if post.location is None: post.location = {} post.location['latitude'] = float(lat) post.location['longitude'] = float(lon) loc_name = request.form.get('location_name') if loc_name is not None: post.location['name'] = loc_name else: post.location = None for url_attr, context_attr in (('in_reply_to', 'reply_contexts'), ('repost_of', 'repost_contexts'), ('like_of', 'like_contexts'), ('bookmark_of', 'bookmark_contexts')): url_str = request.form.get(url_attr) if url_str is not None: urls = util.multiline_string_to_list(url_str) setattr(post, url_attr, urls) # fetch contexts before generating a slug contexts.fetch_contexts(post) if 'item-name' in request.form: post.item = util.trim_nulls({ 'name': request.form.get('item-name'), 'author': request.form.get('item-author'), 'photo': request.form.get('item-photo'), }) if 'rating' in request.form: rating = request.form.get('rating') post.rating = int(rating) if rating else None syndication = request.form.get('syndication') if syndication is not None: post.syndication = util.multiline_string_to_list(syndication) audience = request.form.get('audience') if audience is not None: post.audience = util.multiline_string_to_list(audience) tags = request.form.getlist('tags') if post.post_type != 'article' and post.content: # parse out hashtags as tag links from note-like posts tags += util.find_hashtags(post.content) tags = list(filter(None, map(util.normalize_tag, tags))) post.tags = [Tag.query.filter_by(name=tag).first() or Tag(tag) for tag in tags] post.people = [] people = request.form.getlist('people') for person in people: nick = Nick.query.filter_by(name=person).first() if nick: post.people.append(nick.contact) slug = request.form.get('slug') if slug: post.slug = util.slugify(slug) elif not post.slug or was_draft: post.slug = post.generate_slug() # events should use their start date for permalinks path_date = post.start or post.published if post.draft: m = hashlib.md5() m.update(bytes(path_date.isoformat() + '|' + post.slug, 'utf-8')) post.path = 'drafts/{}'.format(m.hexdigest()) elif not post.path or was_draft: base_path = '{}/{:02d}/{}'.format( path_date.year, path_date.month, post.slug) # generate a unique path unique_path = base_path idx = 1 while Post.load_by_path(unique_path): unique_path = '{}-{}'.format(base_path, idx) idx += 1 post.path = unique_path # generate short path if not post.short_path: short_base = '{}/{}'.format( util.tag_for_post_type(post.post_type), util.base60_encode(util.date_to_ordinal(path_date))) short_paths = set( row[0] for row in db.session.query(Post.short_path).filter( Post.short_path.startswith(short_base)).all()) for idx in itertools.count(1): post.short_path = short_base + util.base60_encode(idx) if post.short_path not in short_paths: break infiles = request.files.getlist('files') + request.files.getlist('photo') current_app.logger.debug('infiles: %s', infiles) for infile in infiles: if infile and infile.filename: current_app.logger.debug('receiving uploaded file %s', infile) attachment = create_attachment_from_file(post, infile) os.makedirs(os.path.dirname(attachment.disk_path), exist_ok=True) infile.save(attachment.disk_path) post.attachments.append(attachment) photo_url = request.form.get('photo') if photo_url: current_app.logger.debug('downloading photo from url %s', photo_url) temp_filename, headers = urllib.request.urlretrieve(photo_url) content_type = headers.get('content-type', '') mimetype = content_type and content_type.split(';')[0].strip() filename = os.path.basename(urllib.parse.urlparse(photo_url).path) attachment = create_attachment(post, filename, mimetype) os.makedirs(os.path.dirname(attachment.disk_path), exist_ok=True) shutil.copyfile(temp_filename, attachment.disk_path) urllib.request.urlcleanup() post.attachments.append(attachment) # pre-render the post html html = util.markdown_filter(post.content, img_path=post.get_image_path()) html = util.autolink(html) if post.post_type == 'article': html = util.process_people_to_microcards(html) else: html = util.process_people_to_at_names(html) post.content_html = html if not post.id: db.session.add(post) db.session.commit() current_app.logger.debug('saved post %d %s', post.id, post.permalink) redirect_url = post.permalink hooks.fire('post-saved', post, request.form) return redirect(redirect_url)
def test_autolink_trailing_slash(): result = util.autolink('http://hel.ly/world/') assert result == '<a class="auto-link" href="http://hel.ly/world/">http://hel.ly/world/</a>'
def test_autolink_word_boundaries(): result = util.autolink('common.language runtime') assert '<a' not in result
def test_autolink_no_consecutive_periods(): result = util.autolink('and....now for something completely different') assert '<a' not in result
def test_autolink_at_names(contacts, mocker): result = util.autolink("@luke this is @leia tell @obiwan he\'s our only help!") assert result == """<a class="microcard h-card" href="http://tatooine.com/moseisley"><img src="http://tatooine.com/luke.jpg"/>Luke Skywalker</a> this is <a class="microcard h-card" href="http://aldera.an"><img src="http://aldera.an/leia.png"/>Princess Leia</a> tell <a href="https://twitter.com/obiwan">@obiwan</a> he's our only help!"""
def save_post(post): was_draft = post.draft pub_str = request.form.get('published') if pub_str: post.published = mf2util.parse_dt(pub_str) start_str = request.form.get('start') if start_str: start = mf2util.parse_dt(start_str) if start: post.start = start post.start_utcoffset = start.utcoffset() end_str = request.form.get('end') if end_str: end = mf2util.parse_dt(end_str) if end: post.end = end post.end_utcoffset = end.utcoffset() now = datetime.datetime.utcnow() if not post.published or was_draft: post.published = now post.updated = now # populate the Post object and save it to the database, # redirect to the view post.title = request.form.get('title', '') post.content = request.form.get('content') post.draft = request.form.get('action') == 'save_draft' post.hidden = request.form.get('hidden', 'false') == 'true' venue_name = request.form.get('new_venue_name') venue_lat = request.form.get('new_venue_latitude') venue_lng = request.form.get('new_venue_longitude') if venue_name and venue_lat and venue_lng: venue = Venue() venue.name = venue_name venue.location = { 'latitude': float(venue_lat), 'longitude': float(venue_lng), } venue.update_slug('{}-{}'.format(venue_lat, venue_lng)) db.session.add(venue) db.session.commit() hooks.fire('venue-saved', venue, request.form) post.venue = venue else: venue_id = request.form.get('venue') if venue_id: post.venue = Venue.query.get(venue_id) lat = request.form.get('latitude') lon = request.form.get('longitude') if lat and lon: if post.location is None: post.location = {} post.location['latitude'] = float(lat) post.location['longitude'] = float(lon) loc_name = request.form.get('location_name') if loc_name is not None: post.location['name'] = loc_name else: post.location = None for url_attr, context_attr in (('in_reply_to', 'reply_contexts'), ('repost_of', 'repost_contexts'), ('like_of', 'like_contexts'), ('bookmark_of', 'bookmark_contexts')): url_str = request.form.get(url_attr) if url_str is not None: urls = util.multiline_string_to_list(url_str) setattr(post, url_attr, urls) # fetch contexts before generating a slug contexts.fetch_contexts(post) syndication = request.form.get('syndication') if syndication is not None: post.syndication = util.multiline_string_to_list(syndication) audience = request.form.get('audience') if audience is not None: post.audience = util.multiline_string_to_list(audience) tags = request.form.getlist('tags') if post.post_type != 'article' and post.content: # parse out hashtags as tag links from note-like posts tags += util.find_hashtags(post.content) tags = list(filter(None, map(util.normalize_tag, tags))) post.tags = [Tag.query.filter_by(name=tag).first() or Tag(tag) for tag in tags] slug = request.form.get('slug') if slug: post.slug = util.slugify(slug) elif not post.slug or was_draft: post.slug = post.generate_slug() # events should use their start date for permalinks path_date = post.start or post.published if post.draft: m = hashlib.md5() m.update(bytes(path_date.isoformat() + '|' + post.slug, 'utf-8')) post.path = 'drafts/{}'.format(m.hexdigest()) elif not post.path or was_draft: base_path = '{}/{:02d}/{}'.format( path_date.year, path_date.month, post.slug) # generate a unique path unique_path = base_path idx = 1 while Post.load_by_path(unique_path): unique_path = '{}-{}'.format(base_path, idx) idx += 1 post.path = unique_path # generate short path if not post.short_path: short_base = '{}/{}'.format( util.tag_for_post_type(post.post_type), util.base60_encode(util.date_to_ordinal(path_date))) short_paths = set( row[0] for row in db.session.query(Post.short_path).filter( Post.short_path.startswith(short_base)).all()) for idx in itertools.count(1): post.short_path = short_base + util.base60_encode(idx) if post.short_path not in short_paths: break infiles = request.files.getlist('files') + request.files.getlist('photo') current_app.logger.debug('infiles: %s', infiles) for infile in infiles: if infile and infile.filename: current_app.logger.debug('receiving uploaded file %s', infile) attachment = create_attachment_from_file(post, infile) os.makedirs(os.path.dirname(attachment.disk_path), exist_ok=True) infile.save(attachment.disk_path) post.attachments.append(attachment) # pre-render the post html html = util.markdown_filter(post.content, img_path=post.get_image_path()) html = util.autolink(html) if post.post_type == 'article': html = util.process_people_to_microcards(html) else: html = util.process_people_to_at_names(html) post.content_html = html if not post.id: db.session.add(post) db.session.commit() current_app.logger.debug('saved post %d %s', post.id, post.permalink) redirect_url = post.permalink hooks.fire('post-saved', post, request.form) return redirect(redirect_url)