def test_sanitize_url_with_long_after_space_quote_url(self): short_url = "http://m m" short_url += ("u" * (URL_MAX_LENGTH - len(short_url) - 1)) self.assertTrue(len(short_url) < URL_MAX_LENGTH) truncated_url, did_truncate = sanitize_url(short_url) self.assertEqual(True, did_truncate) self.assertGreaterEqual(URL_MAX_LENGTH, len(truncated_url))
def test_sanitize_url_with_long_after_space_quote_url(self): short_url = "http://m m" short_url += ("u"*(URL_MAX_LENGTH-len(short_url)-1)) self.assertTrue(len(short_url)<URL_MAX_LENGTH) truncated_url, did_truncate = sanitize_url(short_url) self.assertEqual(True,did_truncate) self.assertGreaterEqual(URL_MAX_LENGTH,len(truncated_url))
def test_sanitize_url_on_long_ascii_url_with_campain_args(self): query_start = "/?" campain_string = "utm_source=rss&utm_medium=rss&utm_campaign=on-peut" filler_size = URL_MAX_LENGTH-7-len(campain_string)-len(query_start)+1 long_url_campain = "http://" + ("u"*filler_size) + query_start + campain_string res_long_url,did_truncate = sanitize_url(long_url_campain) self.assertTrue(did_truncate) self.assertGreaterEqual(URL_MAX_LENGTH,len(res_long_url)) self.assertEqual(long_url_campain[:-len(campain_string)],res_long_url)
def save(self): """Warning: the source will be saved as well as the related objects (no commit options). Returns the source. """ form_url,_ = sanitize_url(self.cleaned_data["url"]) form_title = self.cleaned_data["title"] form_feed_url,_ = sanitize_url(self.cleaned_data["feed_url"]) if self.user.userprofile.web_feeds.filter(source__url=form_url).exists(): # nothing to do return # try a bigger look-up anyway same_sources = WebFeed.objects.filter(source__url=form_url).all() # url are unique for sources if same_sources: new_feed = same_sources[0] else: if form_title: source_title = form_title else: source_title = build_reference_title_from_url(form_url) try: source_ref = Reference.objects.get(url=form_url) except ObjectDoesNotExist: source_ref = Reference(url=form_url,title=source_title, pub_date=datetime.now(timezone.utc)) source_ref.save() new_feed = WebFeed(source=source_ref) # assume that either form_feed_url or form_url have been # validated as a valid feed url new_feed.xmlURL = form_feed_url or form_url new_feed.last_update_check = datetime.utcfromtimestamp(0)\ .replace(tzinfo=timezone.utc) new_feed.save() with transaction.atomic(): source_ref.add_pin() source_ref.save() self.user.userprofile.sources.add(source_ref) self.user.userprofile.public_sources.add(source_ref) self.user.userprofile.web_feeds.add(new_feed) self.user.userprofile.save() return new_feed
def test_sanitize_url_on_long_ascii_url_with_campain_args(self): query_start = "/?" campain_string = "utm_source=rss&utm_medium=rss&utm_campaign=on-peut" filler_size = URL_MAX_LENGTH - 7 - len(campain_string) - len( query_start) + 1 long_url_campain = "http://" + ( "u" * filler_size) + query_start + campain_string res_long_url, did_truncate = sanitize_url(long_url_campain) self.assertTrue(did_truncate) self.assertGreaterEqual(URL_MAX_LENGTH, len(res_long_url)) self.assertEqual(long_url_campain[:-len(campain_string)], res_long_url)
def save(self): """Warning: the source will be saved as well as the related objects (no commit options). Returns the source. """ form_url, _ = sanitize_url(self.cleaned_data["url"]) form_title = self.cleaned_data["title"] form_feed_url, _ = sanitize_url(self.cleaned_data["feed_url"]) if self.user.userprofile.web_feeds.filter( source__url=form_url).exists(): # nothing to do return # try a bigger look-up anyway same_sources = WebFeed.objects.filter(source__url=form_url).all() # url are unique for sources if same_sources: new_feed = same_sources[0] else: if form_title: source_title = form_title else: source_title = build_reference_title_from_url(form_url) try: source_ref = Reference.objects.get(url=form_url) except ObjectDoesNotExist: source_ref = Reference(url=form_url, title=source_title, pub_date=datetime.now(timezone.utc)) source_ref.save() new_feed = WebFeed(source=source_ref) # assume that either form_feed_url or form_url have been # validated as a valid feed url new_feed.xmlURL = form_feed_url or form_url new_feed.last_update_check = datetime.utcfromtimestamp(0)\ .replace(tzinfo=timezone.utc) new_feed.save() self.user.userprofile.sources.add(source_ref) self.user.userprofile.public_sources.add(source_ref) self.user.userprofile.web_feeds.add(new_feed) return new_feed
def create_reference_from_feedparser_entry(entry, date, previous_ref): """Takes a FeedParser entry and create a reference from it and attributing it the publication date given in argument. If the corresponding Reference already exists, it must be given as the previous_ref argument, and if previous_ref is None, it will be assumed that there is no matching Rerefence in the db. Note: Enforce Dave Winer's recommendation for linkblog: http://scripting.com/2014/04/07/howToDisplayTitlelessFeedItems.html with a little twist: if a feed item has no title we will use the (possibly truncated) description as a title and if there is no description the link will be used. In any case the description of a reference is set even if this description is also used for the title. Return a tuple with the unsaved reference and a list of tag names. """ url = entry.link info = "" tags = set() if entry.get("tags", None): tags = set([t.term for t in entry.tags]) if previous_ref is None: url_truncated, did_truncate = sanitize_url(url) if did_truncate: # Save the full url in info to limit the loss of information info = u"<WOM had to truncate the following URL: %s>" % url logger.warning("Found an url of length %d (>%d) \ when importing references from feed." % (len(url), URL_MAX_LENGTH)) url = url_truncated # set the title only for new ref (should avoid weird behaviour # from the user point of view) title = truncate_reference_title( HTMLUnescape(entry.get("title") \ or strip_tags(entry.get("description")) \ or url)) ref = Reference(url=url, title=title) else: ref = previous_ref ref.description = " ".join((info, entry.get("description", ""))) ref.pub_date = date return (ref, tags)
def create_reference_from_feedparser_entry(entry,date,previous_ref): """Takes a FeedParser entry and create a reference from it and attributing it the publication date given in argument. If the corresponding Reference already exists, it must be given as the previous_ref argument, and if previous_ref is None, it will be assumed that there is no matching Rerefence in the db. Note: Enforce Dave Winer's recommendation for linkblog: http://scripting.com/2014/04/07/howToDisplayTitlelessFeedItems.html with a little twist: if a feed item has no title we will use the (possibly truncated) description as a title and if there is no description the link will be used. In any case the description of a reference is set even if this description is also used for the title. Return a tuple with the unsaved reference and a list of tag names. """ url = entry.link info = "" tags = set() if entry.get("tags",None): tags = set([t.term for t in entry.tags]) if previous_ref is None: url_truncated,did_truncate = sanitize_url(url) if did_truncate: # Save the full url in info to limit the loss of information info = u"<WOM had to truncate the following URL: %s>" % url logger.warning("Found an url of length %d (>%d) \ when importing references from feed." % (len(url),URL_MAX_LENGTH)) url = url_truncated # set the title only for new ref (should avoid weird behaviour # from the user point of view) title = truncate_reference_title( HTMLUnescape(entry.get("title") \ or strip_tags(entry.get("description")) \ or url)) ref = Reference(url=url,title=title) else: ref = previous_ref ref.description = " ".join((info,entry.get("description",""))) ref.pub_date = date return (ref,tags)
def create_reference_from_tweet_summary( summary, summary_url, title, date, previous_ref): """Takes a html summary of a tweet timeline creates a reference from it, attributing it the publication date given in argument. If the corresponding Reference already exists, it must be given as the previous_ref argument, and if previous_ref is None, it will be assumed that there is no matching Rerefence in the db. Return a tuple with the unsaved reference and a list of tag names. """ if previous_ref is None: url_truncated, did_truncate = sanitize_url(summary_url) assert not did_truncate title_truncated = truncate_reference_title(title) ref = Reference(url=summary_url,title=title_truncated) else: ref = previous_ref ref.description = summary ref.pub_date = date return ref
def test_sanitize_url_on_long_ascii_url(self): long_url = "http://" + ("u" * URL_MAX_LENGTH) res_long_url, did_truncate = sanitize_url(long_url) self.assertTrue(did_truncate) self.assertGreaterEqual(URL_MAX_LENGTH, len(res_long_url))
def test_sanitize_url_with_spaces(self): short_url = u"http://m m" self.assertTrue(len(short_url)<URL_MAX_LENGTH) escaped_url= "http://m%20m" self.assertEqual((escaped_url,False),sanitize_url(short_url))
def test_sanitize_url_with_non_ascii_characters(self): short_url = "http://méhœñ۳予" self.assertTrue(len(short_url)<URL_MAX_LENGTH) escaped_url= "http://m%C3%A9h%C5%93%C3%B1%DB%B3%E4%BA%88" self.assertEqual((escaped_url,False),sanitize_url(short_url))
def test_sanitize_url_with_non_ascii_characters(self): short_url = "http://méhœñ۳予" self.assertTrue(len(short_url) < URL_MAX_LENGTH) escaped_url = "http://m%C3%A9h%C5%93%C3%B1%DB%B3%E4%BA%88" self.assertEqual((escaped_url, False), sanitize_url(short_url))
def test_sanitize_url_on_long_ascii_url(self): long_url = "http://" + ("u"*URL_MAX_LENGTH) res_long_url,did_truncate = sanitize_url(long_url) self.assertTrue(did_truncate) self.assertGreaterEqual(URL_MAX_LENGTH,len(res_long_url))
def test_sanitize_url_on_short_ascii_url(self): short_url = "http://mouf" self.assertTrue(len(short_url)<URL_MAX_LENGTH) self.assertEqual((short_url,False),sanitize_url(short_url))
def save(self): """Warning: the bookmark will be saved as well as the related objects (no commit options). Returns the bookmark. """ url,_ = sanitize_url(self.cleaned_data["url"]) title = self.cleaned_data["title"] \ or build_reference_title_from_url(url) comment = self.cleaned_data["comment"] pub_date = self.cleaned_data["pub_date"] \ or datetime.now(timezone.utc) src_url,_ = sanitize_url(self.cleaned_data["source_url"] \ or build_source_url_from_reference_url(url)) src_title = self.cleaned_data["source_title"] \ or build_reference_title_from_url(src_url) # Find or create a matching reference try: bookmarked_ref = Reference.objects.get(url=url) # Arbitrarily chose one of the possible sources src_query = bookmarked_ref.sources if src_query.count() > 1: ref_src = src_query.all()[0] else: ref_src = src_query.get() except ObjectDoesNotExist: try: ref_src = Reference.objects.get(url=src_url) except ObjectDoesNotExist: ref_src = Reference(url=src_url,title=src_title,pub_date=pub_date) ref_src.save() if src_url == url: bookmarked_ref = ref_src else: bookmarked_ref = Reference(url=url, title=title, pub_date=pub_date) bookmarked_ref.save() bookmarked_ref.sources.add(ref_src) with transaction.commit_on_success(): try: bmk = UserBookmark.objects.get(owner=self.user,reference=bookmarked_ref) except ObjectDoesNotExist: bmk = UserBookmark(owner=self.user,reference=bookmarked_ref, saved_date=datetime.now(timezone.utc)) bookmarked_ref.save_count += 1 bmk.save() bookmarked_ref.save() # allow the user-specific comment to be changed and also prefix # it with the user specified title if it differs from the # existing reference title. if comment: new_comment = comment else: new_comment = bmk.comment if self.cleaned_data["title"] and title!=bookmarked_ref.title \ and not new_comment.startswith(title): new_comment = "%s: %s" % (title,new_comment) if new_comment!=bmk.comment: bmk.comment = new_comment bmk.save() with transaction.commit_on_success(): if ref_src not in self.user.userprofile.sources.all(): self.user.userprofile.sources.add(ref_src) with transaction.commit_on_success(): for rust in ReferenceUserStatus\ .objects.filter(owner=self.user, reference=bookmarked_ref).all(): rust.has_been_saved = True rust.save() return bmk
def test_sanitize_url_on_short_ascii_url(self): short_url = "http://mouf" self.assertTrue(len(short_url) < URL_MAX_LENGTH) self.assertEqual((short_url, False), sanitize_url(short_url))
def save(self): """Warning: the bookmark will be saved as well as the related objects (no commit options). Returns the bookmark. """ url, _ = sanitize_url(self.cleaned_data["url"]) title = self.cleaned_data["title"] \ or build_reference_title_from_url(url) comment = self.cleaned_data["comment"] pub_date = self.cleaned_data["pub_date"] \ or datetime.now(timezone.utc) src_url,_ = sanitize_url(self.cleaned_data["source_url"] \ or build_source_url_from_reference_url(url)) src_title = self.cleaned_data["source_title"] \ or build_reference_title_from_url(src_url) # Find or create a matching reference try: bookmarked_ref = Reference.objects.get(url=url) # Arbitrarily chose one of the possible sources src_query = bookmarked_ref.sources if src_query.count() > 1: ref_src = src_query.all()[0] else: ref_src = src_query.get() except ObjectDoesNotExist: try: ref_src = Reference.objects.get(url=src_url) except ObjectDoesNotExist: ref_src = Reference(url=src_url, title=src_title, pub_date=pub_date) ref_src.save() if src_url == url: bookmarked_ref = ref_src else: bookmarked_ref = Reference(url=url, title=title, pub_date=pub_date) bookmarked_ref.save() bookmarked_ref.sources.add(ref_src) with transaction.commit_on_success(): try: bmk = UserBookmark.objects.get(owner=self.user, reference=bookmarked_ref) except ObjectDoesNotExist: bmk = UserBookmark(owner=self.user, reference=bookmarked_ref, saved_date=datetime.now(timezone.utc)) bookmarked_ref.save_count += 1 bmk.save() bookmarked_ref.save() # allow the user-specific comment to be changed and also prefix # it with the user specified title if it differs from the # existing reference title. if comment: new_comment = comment else: new_comment = bmk.comment if self.cleaned_data["title"] and title!=bookmarked_ref.title \ and not new_comment.startswith(title): new_comment = "%s: %s" % (title, new_comment) if new_comment != bmk.comment: bmk.comment = new_comment bmk.save() with transaction.commit_on_success(): if ref_src not in self.user.userprofile.sources.all(): self.user.userprofile.sources.add(ref_src) with transaction.commit_on_success(): for rust in ReferenceUserStatus\ .objects.filter(owner=self.user, reference=bookmarked_ref).all(): rust.has_been_saved = True rust.save() return bmk
def test_sanitize_url_with_spaces(self): short_url = u"http://m m" self.assertTrue(len(short_url) < URL_MAX_LENGTH) escaped_url = "http://m%20m" self.assertEqual((escaped_url, False), sanitize_url(short_url))