def forwards(self, orm): from pages.models import slugify from links import extract_included_pagenames for page in orm['pages.Page'].objects.all().iterator(): region = page.region included_pages = extract_included_pagenames(page.content) print "..recording included pages on %s" % smart_str(page.name) for pagename in included_pages: page_exists = orm['pages.Page'].objects.filter( slug=slugify(pagename), region=region) if page_exists: included_page = page_exists[0] else: included_page = None if orm.IncludedPage.objects.filter( source=page, included_page=included_page).exists(): continue if orm.IncludedPage.objects.filter( source=page, included_page_name__iexact=pagename).exists(): if included_page: included = orm.IncludedPage.objects.filter( source=page, included_page_name__iexact=pagename)[0] included.included_page = included_page included.save() else: included = orm.IncludedPage( source=page, region=region, included_page=included_page, included_page_name=pagename, ) included.save()
def forwards(self, orm): from pages.models import slugify from links import extract_included_pagenames for page in orm['pages.Page'].objects.all().iterator(): region = page.region included_pages = extract_included_pagenames(page.content) print "..recording included pages on %s" % smart_str(page.name) for pagename in included_pages: page_exists = orm['pages.Page'].objects.filter(slug=slugify(pagename), region=region) if page_exists: included_page = page_exists[0] else: included_page = None if orm.IncludedPage.objects.filter(source=page, included_page=included_page).exists(): continue if orm.IncludedPage.objects.filter(source=page, included_page_name__iexact=pagename).exists(): if included_page: included = orm.IncludedPage.objects.filter(source=page, included_page_name__iexact=pagename)[0] included.included_page = included_page included.save() else: included = orm.IncludedPage( source=page, region=region, included_page=included_page, included_page_name=pagename, ) included.save()
def record_page_includes(page): region = page.region included = extract_included_pagenames(page.content) for pagename in included: included_pg_exists = IncludedPage.objects.filter( source=page, region=region, included_page_slug=slugify(pagename)) if not included_pg_exists: page_exists = Page.objects.filter(slug=slugify(pagename), region=region) if page_exists: included_page = page_exists[0] else: included_page = None m = IncludedPage( source=page, region=region, included_page=included_page, included_page_name=pagename, included_page_slug=slugify(pagename), ) m.save() included = [slugify(pagename) for pagename in included] # Remove included pages they've removed from the page to_delete = IncludedPage.objects.filter( source=page, region=region).exclude(included_page_slug__in=included) for m in to_delete: m.delete()
def record_page_includes(page): region = page.region included = extract_included_pagenames(page.content) for pagename in included: included_pg_exists = IncludedPage.objects.filter( source=page, region=region, included_page_slug=slugify(pagename)) if not included_pg_exists: page_exists = Page.objects.filter(slug=slugify(pagename), region=region) if page_exists: included_page = page_exists[0] else: included_page = None m = IncludedPage( source=page, region=region, included_page=included_page, included_page_name=pagename, included_page_slug=slugify(pagename), ) m.save() included = [slugify(pagename) for pagename in included] # Remove included pages they've removed from the page to_delete = IncludedPage.objects.filter(source=page, region=region).exclude(included_page_slug__in=included) for m in to_delete: m.delete()
def test_ignore_other_links(self): html = """ <p>I love <a href="Parks">outside</a>.</p> <p>I love <a href="http://example.org/Night">test</a>.</p> """ included_pagenames = extract_included_pagenames(html) self.assertFalse('Parks' in included_pagenames) self.assertTrue(included_pagenames == [])
def test_link_unquoting(self): html = """ <p>I love <a href="Cats%20and%20dogs" class="includepage plugin right"></a>.</p> <p>I love <a href="Cats and dogs" class="plugin includepage left"></a>.</p> """ included_pagenames = extract_included_pagenames(html) self.assertTrue('Cats and dogs' in included_pagenames) self.assertFalse('Cats%20and%20dogs' in included_pagenames)
def test_ignore_anchors(self): html = """ <p>I love <a href="Parks" class="plugin includepage">outside</a>.</p> <p>I love <a href="#gohere">test</a>.</p> <p>I love <a>test now</a>.</p> """ included_pagenames= extract_included_pagenames(html) self.assertTrue('Parks' in included_pagenames) self.assertEqual(len(included_pagenames), 1)
def test_simple_extraction(self): html = """ <p>I love <a href="Parks" class="plugin includepage"></a>.</p> """ included_pagenames = extract_included_pagenames(html) self.assertTrue('Parks' in included_pagenames)