def record_tag_includes(page): region = page.region included = extract_included_tags(page.content) for tag_slug in included: included_tag_exists = IncludedTagList.objects.filter( source=page, region=region, included_tag__slug=tag_slug) if not included_tag_exists: tag_exists = Tag.objects.filter(slug=tag_slug, region=region) if tag_exists: included_tag = tag_exists[0] else: continue m = IncludedTagList( source=page, region=region, included_tag=included_tag, ) m.save() # Remove tag lists they've removed from the page to_delete = IncludedTagList.objects.filter( source=page, region=region).exclude(included_tag__slug__in=included) for m in to_delete: m.delete()
def record_tag_includes(page): region = page.region included = extract_included_tags(page.content) for tag_slug in included: included_tag_exists = IncludedTagList.objects.filter( source=page, region=region, included_tag__slug=tag_slug) if not included_tag_exists: tag_exists = Tag.objects.filter(slug=tag_slug, region=region) if tag_exists: included_tag = tag_exists[0] else: continue m = IncludedTagList( source=page, region=region, included_tag=included_tag, ) m.save() # Remove tag lists they've removed from the page to_delete = IncludedTagList.objects.filter(source=page, region=region).exclude(included_tag__slug__in=included) for m in to_delete: m.delete()
def test_ignore_other_links(self): html = """ <p>I love <a href="Parks">outside</a>.</p> <p>I love <a href="http://example.org/Night">test</a>.</p> """ included_tags = extract_included_tags(html) self.assertFalse('parks' in included_tags) self.assertTrue(included_tags == [])
def forwards(self, orm): from pages.models import slugify from links import extract_included_tags for page in orm['pages.Page'].objects.all().iterator(): region = page.region tag_slugs = extract_included_tags(page.content) print "..recording included tags on %s" % smart_str(page.name) for tag_slug in tag_slugs: tag_exists = orm['tags.Tag'].objects.filter(slug=tag_slug, region=region) if not tag_exists: continue tag = tag_exists[0] if orm.IncludedTagList.objects.filter(source=page, included_tag=tag).exists(): continue included = orm.IncludedTagList( source=page, region=region, included_tag=tag ) included.save() pass
def test_case_insensitive(self): html = """ <p>I love <a href="tags%2FPARKS" class="plugin includetag"></a>.</p> """ included_tags = extract_included_tags(html) self.assertTrue('parks' in included_tags)
def test_simple_extraction(self): html = """ <p>I love <a href="tags%2Fparks" class="plugin includetag"></a>.</p> """ included_tags = extract_included_tags(html) self.assertTrue('parks' in included_tags)