Python sanitize_url Examples, wom_pebbles.tasks.sanitize_url Python Examples

Example #1

0

Show file

File: tests.py Project: rishabht1/wateronmars

 def test_sanitize_url_with_long_after_space_quote_url(self):
     short_url = "http://m m"
     short_url += ("u" * (URL_MAX_LENGTH - len(short_url) - 1))
     self.assertTrue(len(short_url) < URL_MAX_LENGTH)
     truncated_url, did_truncate = sanitize_url(short_url)
     self.assertEqual(True, did_truncate)
     self.assertGreaterEqual(URL_MAX_LENGTH, len(truncated_url))

Example #2

0

Show file

File: tests.py Project: DarKnight24/wateronmars

 def test_sanitize_url_with_long_after_space_quote_url(self):
   short_url = "http://m m"
   short_url += ("u"*(URL_MAX_LENGTH-len(short_url)-1))
   self.assertTrue(len(short_url)<URL_MAX_LENGTH)
   truncated_url, did_truncate = sanitize_url(short_url)
   self.assertEqual(True,did_truncate)
   self.assertGreaterEqual(URL_MAX_LENGTH,len(truncated_url))

Example #3

0

Show file

File: tests.py Project: DarKnight24/wateronmars

 def test_sanitize_url_on_long_ascii_url_with_campain_args(self):
   query_start = "/?"
   campain_string = "utm_source=rss&utm_medium=rss&utm_campaign=on-peut"
   filler_size = URL_MAX_LENGTH-7-len(campain_string)-len(query_start)+1
   long_url_campain = "http://" + ("u"*filler_size) + query_start + campain_string
   res_long_url,did_truncate = sanitize_url(long_url_campain)
   self.assertTrue(did_truncate)
   self.assertGreaterEqual(URL_MAX_LENGTH,len(res_long_url))
   self.assertEqual(long_url_campain[:-len(campain_string)],res_long_url)

Example #4

0

Show file

File: forms.py Project: tibonihoo/wateronmars

 def save(self):
   """Warning: the source will be saved as well as the related objects
   (no commit options).
   Returns the source.
   """
   form_url,_ = sanitize_url(self.cleaned_data["url"])
   form_title = self.cleaned_data["title"]
   form_feed_url,_ = sanitize_url(self.cleaned_data["feed_url"])
   if self.user.userprofile.web_feeds.filter(source__url=form_url).exists():
     # nothing to do
     return
   # try a bigger look-up anyway
   same_sources = WebFeed.objects.filter(source__url=form_url).all()
   # url are unique for sources
   if same_sources:
     new_feed = same_sources[0]
   else:
     if form_title:
       source_title = form_title
     else:
       source_title = build_reference_title_from_url(form_url)
     try:
       source_ref = Reference.objects.get(url=form_url)
     except ObjectDoesNotExist:
       source_ref = Reference(url=form_url,title=source_title,
                              pub_date=datetime.now(timezone.utc))
       source_ref.save()
     new_feed = WebFeed(source=source_ref)
     # assume that either form_feed_url or form_url have been
     # validated as a valid feed url
     new_feed.xmlURL = form_feed_url or form_url
     new_feed.last_update_check = datetime.utcfromtimestamp(0)\
                                          .replace(tzinfo=timezone.utc)
     new_feed.save()
   with transaction.atomic():
     source_ref.add_pin()
     source_ref.save()
     self.user.userprofile.sources.add(source_ref)
     self.user.userprofile.public_sources.add(source_ref)
     self.user.userprofile.web_feeds.add(new_feed)
     self.user.userprofile.save()
   return new_feed

Example #5

0

Show file

File: tests.py Project: rishabht1/wateronmars

 def test_sanitize_url_on_long_ascii_url_with_campain_args(self):
     query_start = "/?"
     campain_string = "utm_source=rss&utm_medium=rss&utm_campaign=on-peut"
     filler_size = URL_MAX_LENGTH - 7 - len(campain_string) - len(
         query_start) + 1
     long_url_campain = "http://" + (
         "u" * filler_size) + query_start + campain_string
     res_long_url, did_truncate = sanitize_url(long_url_campain)
     self.assertTrue(did_truncate)
     self.assertGreaterEqual(URL_MAX_LENGTH, len(res_long_url))
     self.assertEqual(long_url_campain[:-len(campain_string)], res_long_url)

Example #6

0

Show file

 def save(self):
     """Warning: the source will be saved as well as the related objects
 (no commit options).
 Returns the source.
 """
     form_url, _ = sanitize_url(self.cleaned_data["url"])
     form_title = self.cleaned_data["title"]
     form_feed_url, _ = sanitize_url(self.cleaned_data["feed_url"])
     if self.user.userprofile.web_feeds.filter(
             source__url=form_url).exists():
         # nothing to do
         return
     # try a bigger look-up anyway
     same_sources = WebFeed.objects.filter(source__url=form_url).all()
     # url are unique for sources
     if same_sources:
         new_feed = same_sources[0]
     else:
         if form_title:
             source_title = form_title
         else:
             source_title = build_reference_title_from_url(form_url)
         try:
             source_ref = Reference.objects.get(url=form_url)
         except ObjectDoesNotExist:
             source_ref = Reference(url=form_url,
                                    title=source_title,
                                    pub_date=datetime.now(timezone.utc))
             source_ref.save()
         new_feed = WebFeed(source=source_ref)
         # assume that either form_feed_url or form_url have been
         # validated as a valid feed url
         new_feed.xmlURL = form_feed_url or form_url
         new_feed.last_update_check = datetime.utcfromtimestamp(0)\
                                              .replace(tzinfo=timezone.utc)
         new_feed.save()
     self.user.userprofile.sources.add(source_ref)
     self.user.userprofile.public_sources.add(source_ref)
     self.user.userprofile.web_feeds.add(new_feed)
     return new_feed

Example #7

0

Show file

File: tasks.py Project: rishabht1/wateronmars

def create_reference_from_feedparser_entry(entry, date, previous_ref):
    """Takes a FeedParser entry and create a reference from it and
  attributing it the publication date given in argument.

  If the corresponding Reference already exists, it must be given as
  the previous_ref argument, and if previous_ref is None, it will be
  assumed that there is no matching Rerefence in the db.

  Note: Enforce Dave Winer's recommendation for linkblog:
  http://scripting.com/2014/04/07/howToDisplayTitlelessFeedItems.html
  with a little twist: if a feed item has no title we will use the
  (possibly truncated) description as a title and if there is no
  description the link will be used. In any case the description of a
  reference is set even if this description is also used for the
  title.
  
  Return a tuple with the unsaved reference and a list of tag names.
  """
    url = entry.link
    info = ""
    tags = set()
    if entry.get("tags", None):
        tags = set([t.term for t in entry.tags])
    if previous_ref is None:
        url_truncated, did_truncate = sanitize_url(url)
        if did_truncate:
            # Save the full url in info to limit the loss of information
            info = u"<WOM had to truncate the following URL: %s>" % url
            logger.warning("Found an url of length %d (>%d) \
when importing references from feed." % (len(url), URL_MAX_LENGTH))
        url = url_truncated
        # set the title only for new ref (should avoid weird behaviour
        # from the user point of view)
        title = truncate_reference_title(
          HTMLUnescape(entry.get("title") \
                       or strip_tags(entry.get("description")) \
                       or url))
        ref = Reference(url=url, title=title)
    else:
        ref = previous_ref
    ref.description = " ".join((info, entry.get("description", "")))
    ref.pub_date = date
    return (ref, tags)

Example #8

0

Show file

File: tasks.py Project: DarKnight24/wateronmars

def create_reference_from_feedparser_entry(entry,date,previous_ref):
  """Takes a FeedParser entry and create a reference from it and
  attributing it the publication date given in argument.

  If the corresponding Reference already exists, it must be given as
  the previous_ref argument, and if previous_ref is None, it will be
  assumed that there is no matching Rerefence in the db.

  Note: Enforce Dave Winer's recommendation for linkblog:
  http://scripting.com/2014/04/07/howToDisplayTitlelessFeedItems.html
  with a little twist: if a feed item has no title we will use the
  (possibly truncated) description as a title and if there is no
  description the link will be used. In any case the description of a
  reference is set even if this description is also used for the
  title.
  
  Return a tuple with the unsaved reference and a list of tag names.
  """
  url = entry.link
  info = ""
  tags = set()
  if entry.get("tags",None):
    tags = set([t.term for t in entry.tags])
  if previous_ref is None:
    url_truncated,did_truncate = sanitize_url(url)
    if did_truncate:
      # Save the full url in info to limit the loss of information
      info = u"<WOM had to truncate the following URL: %s>" % url
      logger.warning("Found an url of length %d (>%d) \
when importing references from feed." % (len(url),URL_MAX_LENGTH))
    url = url_truncated
    # set the title only for new ref (should avoid weird behaviour
    # from the user point of view)
    title = truncate_reference_title(
      HTMLUnescape(entry.get("title") \
                   or strip_tags(entry.get("description")) \
                   or url))
    ref = Reference(url=url,title=title)
  else:
    ref = previous_ref
  ref.description = " ".join((info,entry.get("description","")))
  ref.pub_date = date
  return (ref,tags)

Example #9

0

Show file

File: tasks.py Project: tibonihoo/wateronmars

def create_reference_from_tweet_summary(
    summary, summary_url, title, date, previous_ref):
  """Takes a html summary of a tweet timeline creates a reference from it, attributing it the publication date given in argument.

  If the corresponding Reference already exists, it must be given as
  the previous_ref argument, and if previous_ref is None, it will be
  assumed that there is no matching Rerefence in the db.
  
  Return a tuple with the unsaved reference and a list of tag names.
  """
  if previous_ref is None:
    url_truncated, did_truncate = sanitize_url(summary_url)
    assert not did_truncate
    title_truncated = truncate_reference_title(title)
    ref = Reference(url=summary_url,title=title_truncated)
  else:
    ref = previous_ref
  ref.description = summary
  ref.pub_date = date
  return ref

Example #10

0

Show file

File: tests.py Project: rishabht1/wateronmars

 def test_sanitize_url_on_long_ascii_url(self):
     long_url = "http://" + ("u" * URL_MAX_LENGTH)
     res_long_url, did_truncate = sanitize_url(long_url)
     self.assertTrue(did_truncate)
     self.assertGreaterEqual(URL_MAX_LENGTH, len(res_long_url))

Example #11

0

Show file

File: tests.py Project: DarKnight24/wateronmars

 def test_sanitize_url_with_spaces(self):
   short_url = u"http://m m"
   self.assertTrue(len(short_url)<URL_MAX_LENGTH)
   escaped_url= "http://m%20m"
   self.assertEqual((escaped_url,False),sanitize_url(short_url))

Example #12

0

Show file

File: tests.py Project: DarKnight24/wateronmars

 def test_sanitize_url_with_non_ascii_characters(self):
   short_url = "http://méhœñ۳予"
   self.assertTrue(len(short_url)<URL_MAX_LENGTH)
   escaped_url= "http://m%C3%A9h%C5%93%C3%B1%DB%B3%E4%BA%88"
   self.assertEqual((escaped_url,False),sanitize_url(short_url))

Example #13

0

Show file

File: tests.py Project: rishabht1/wateronmars

 def test_sanitize_url_with_non_ascii_characters(self):
     short_url = "http://méhœñ۳予"
     self.assertTrue(len(short_url) < URL_MAX_LENGTH)
     escaped_url = "http://m%C3%A9h%C5%93%C3%B1%DB%B3%E4%BA%88"
     self.assertEqual((escaped_url, False), sanitize_url(short_url))

Example #14

0

Show file

File: tests.py Project: DarKnight24/wateronmars

 def test_sanitize_url_on_long_ascii_url(self):
   long_url = "http://" + ("u"*URL_MAX_LENGTH)
   res_long_url,did_truncate = sanitize_url(long_url)
   self.assertTrue(did_truncate)
   self.assertGreaterEqual(URL_MAX_LENGTH,len(res_long_url))

Example #15

0

Show file

File: tests.py Project: DarKnight24/wateronmars

 def test_sanitize_url_on_short_ascii_url(self):
   short_url = "http://mouf"
   self.assertTrue(len(short_url)<URL_MAX_LENGTH)
   self.assertEqual((short_url,False),sanitize_url(short_url))

Example #16

0

Show file

File: forms.py Project: DarKnight24/wateronmars

 def save(self):
   """Warning: the bookmark will be saved as well as the related objects
   (no commit options).
   Returns the bookmark.
   """
   url,_ = sanitize_url(self.cleaned_data["url"])
   title = self.cleaned_data["title"] \
           or build_reference_title_from_url(url)
   comment = self.cleaned_data["comment"]
   pub_date = self.cleaned_data["pub_date"] \
              or datetime.now(timezone.utc)
   src_url,_ = sanitize_url(self.cleaned_data["source_url"] \
                            or build_source_url_from_reference_url(url))
   src_title = self.cleaned_data["source_title"] \
              or build_reference_title_from_url(src_url)
   # Find or create a matching reference
   try:
     bookmarked_ref = Reference.objects.get(url=url)
     # Arbitrarily chose one of the possible sources
     src_query = bookmarked_ref.sources
     if src_query.count() > 1:
       ref_src = src_query.all()[0]
     else:
       ref_src = src_query.get()
   except ObjectDoesNotExist:
     try:
       ref_src = Reference.objects.get(url=src_url)
     except ObjectDoesNotExist:
       ref_src = Reference(url=src_url,title=src_title,pub_date=pub_date)
       ref_src.save()
     if src_url == url:
       bookmarked_ref = ref_src
     else:
       bookmarked_ref = Reference(url=url,
                                  title=title,
                                  pub_date=pub_date)
       bookmarked_ref.save()
       bookmarked_ref.sources.add(ref_src)
   with transaction.commit_on_success():
     try:
       bmk = UserBookmark.objects.get(owner=self.user,reference=bookmarked_ref)
     except ObjectDoesNotExist:
       bmk = UserBookmark(owner=self.user,reference=bookmarked_ref,
                          saved_date=datetime.now(timezone.utc))
       bookmarked_ref.save_count += 1
       bmk.save()
       bookmarked_ref.save()
     # allow the user-specific comment to be changed and also prefix
     # it with the user specified title if it differs from the
     # existing reference title.
     if comment:
       new_comment = comment
     else:
       new_comment = bmk.comment
     if self.cleaned_data["title"] and title!=bookmarked_ref.title \
        and not new_comment.startswith(title):
       new_comment = "%s: %s" % (title,new_comment)
     if new_comment!=bmk.comment:
       bmk.comment = new_comment
       bmk.save()
   with transaction.commit_on_success():
     if ref_src not in self.user.userprofile.sources.all():
       self.user.userprofile.sources.add(ref_src)
   with transaction.commit_on_success():
     for rust in ReferenceUserStatus\
       .objects.filter(owner=self.user,
                       reference=bookmarked_ref).all():
       rust.has_been_saved = True
       rust.save()
   return bmk

Example #17

0

Show file

File: tests.py Project: rishabht1/wateronmars

 def test_sanitize_url_on_short_ascii_url(self):
     short_url = "http://mouf"
     self.assertTrue(len(short_url) < URL_MAX_LENGTH)
     self.assertEqual((short_url, False), sanitize_url(short_url))

Example #18

0

Show file

 def save(self):
     """Warning: the bookmark will be saved as well as the related objects
 (no commit options).
 Returns the bookmark.
 """
     url, _ = sanitize_url(self.cleaned_data["url"])
     title = self.cleaned_data["title"] \
             or build_reference_title_from_url(url)
     comment = self.cleaned_data["comment"]
     pub_date = self.cleaned_data["pub_date"] \
                or datetime.now(timezone.utc)
     src_url,_ = sanitize_url(self.cleaned_data["source_url"] \
                              or build_source_url_from_reference_url(url))
     src_title = self.cleaned_data["source_title"] \
                or build_reference_title_from_url(src_url)
     # Find or create a matching reference
     try:
         bookmarked_ref = Reference.objects.get(url=url)
         # Arbitrarily chose one of the possible sources
         src_query = bookmarked_ref.sources
         if src_query.count() > 1:
             ref_src = src_query.all()[0]
         else:
             ref_src = src_query.get()
     except ObjectDoesNotExist:
         try:
             ref_src = Reference.objects.get(url=src_url)
         except ObjectDoesNotExist:
             ref_src = Reference(url=src_url,
                                 title=src_title,
                                 pub_date=pub_date)
             ref_src.save()
         if src_url == url:
             bookmarked_ref = ref_src
         else:
             bookmarked_ref = Reference(url=url,
                                        title=title,
                                        pub_date=pub_date)
             bookmarked_ref.save()
             bookmarked_ref.sources.add(ref_src)
     with transaction.commit_on_success():
         try:
             bmk = UserBookmark.objects.get(owner=self.user,
                                            reference=bookmarked_ref)
         except ObjectDoesNotExist:
             bmk = UserBookmark(owner=self.user,
                                reference=bookmarked_ref,
                                saved_date=datetime.now(timezone.utc))
             bookmarked_ref.save_count += 1
             bmk.save()
             bookmarked_ref.save()
         # allow the user-specific comment to be changed and also prefix
         # it with the user specified title if it differs from the
         # existing reference title.
         if comment:
             new_comment = comment
         else:
             new_comment = bmk.comment
         if self.cleaned_data["title"] and title!=bookmarked_ref.title \
            and not new_comment.startswith(title):
             new_comment = "%s: %s" % (title, new_comment)
         if new_comment != bmk.comment:
             bmk.comment = new_comment
             bmk.save()
     with transaction.commit_on_success():
         if ref_src not in self.user.userprofile.sources.all():
             self.user.userprofile.sources.add(ref_src)
     with transaction.commit_on_success():
         for rust in ReferenceUserStatus\
           .objects.filter(owner=self.user,
                           reference=bookmarked_ref).all():
             rust.has_been_saved = True
             rust.save()
     return bmk

Example #19

0

Show file

File: tests.py Project: rishabht1/wateronmars

 def test_sanitize_url_with_spaces(self):
     short_url = u"http://m m"
     self.assertTrue(len(short_url) < URL_MAX_LENGTH)
     escaped_url = "http://m%20m"
     self.assertEqual((escaped_url, False), sanitize_url(short_url))