def test_store_best_content_should_do_nothing_if_content_is_already_set(self): p = Page(url=some_url, owner=a_user, pending=False) p.put() expect(Content).for_url.never() page.task_store_best_content(p.key())
def test_should_convert_v0_page_to_v1_page(self): # for reference, the v0 properties are: #class Version1Page(db.Model): # version = db.IntegerProperty() # url = db.URLProperty(required=True) # _content_url = db.URLProperty() # content = db.TextProperty() # title = db.StringProperty() # owner = db.UserProperty(required=True) # date = db.DateTimeProperty(auto_now_add=True) # _messages = db.StringListProperty() expect(page.deferred).defer.never() p = Page(version=0, url=some_url, _content_url=some_url + "/content", content='content', title='title', owner=a_user, _messages = ['info something bad happened...']) p.put() p = Page.get(p.key()) self.assertEqual(p.version, 1) self.assertEqual(p.url, some_url) self.assertEqual(p.content, 'content') self.assertEqual(p.title, "title") self.assertEqual(p.owner, a_user) self.assertEqual(p._messages, ['info something bad happened...']) self.assertEqual(p.content_url, some_url + "/content")
def test_find_complete_should_skip_incomplete_pages(self): incomplete = Page(url=some_url, owner=a_user) complete = Page(url=some_url, owner=a_user, content='') [x.put() for x in (incomplete, complete)] pages = list(Page.find_complete(a_user)) self.assertEquals(len(pages), 1) self.assertEqual(pages[0].key(), complete.key())
def test_should_log_error_and_ignore_transforms_if_they_fail(self): modify(page).content_extractors = [] p = Page(url=some_url, owner=a_user) expect(page.Transform).process(p).and_raise(page.TransformError("transform failed")) expect(p).error("transform failed") expect(deferred).defer(page.task_extract_content, *any_args).twice() p.start_content_population()
def test_should_accept_the_best_content(self): page = Page(url=some_url, owner=a_user, pending=True) content1 = Content(url=some_url, title='bad', body='not much') content2 = Content(url=some_url, title='good', body='much more voluminous content') put(page, content1, content2) expect(Content).for_url(some_url).and_return([content1, content2]) page_module.task_store_best_content(page.key()) page = Page.get(page.key()) self.assertEqual(page.title, content2.title) self.assertEqual(page.content, content2.body)
def test_page_should_launch_tasks_to_populate_data(self): p = Page(url=some_url, owner=a_user) p.put() expect(page.Transform).process(p) expect(deferred).defer(page.task_extract_content, 'native', p.key()) expect(deferred).defer(page.task_extract_content, 'view_text', p.key()) p.start_content_population()
def setUp(self): self.app = fixtures.app() self.items_per_page = 10 page = PageDriver() for x in range(0,self.items_per_page + 2): Page(url='http://localhost/?%s' % (x,), owner=a_user, content='some content').put() print "REALLY THERE ARE %s pages" % (len(Page.all().fetch(100)),)
def delete(self): page = Page.find(owner=self.user(), url=self.url()) if page: page.delete() if not self.is_ajax(): self.redirect('/') else: info("could not find page: %s" % (self.url(),)) raise HttpError(404, "could not find saved page: %s" % (cgi.escape(self.url(),)))
def get(self, handle, email): email = urllib2.unquote(email) if not UserID.auth(email, int(handle)): info("invalid credentials: %s-%s" % (email, handle)) raise HttpError(403, "invalid credentials... ") user = users.User(email) template_values = { 'user': user.nickname(), 'pages': Page.find_complete(user), 'uri': self.uri(), } debug("template values: %r" % (template_values,)) self.response.out.write(render('feed.rss', template_values))
def test_store_best_content_should_store_empty_values_if_forced(self): p = Page(url=some_url, owner=a_user) p.put() modify(page).content_extractors = [1,2, 3, 4, 5, 6] when(Content).for_url(p.url).then_return([]) page.task_store_best_content(p.key(), force=True) p = Page.get(p.key()) self.assertEquals(p.title, '[localhost saved item]') self.assertEquals(p.content, None)
def test_store_best_content_should_do_nothing_if_not_all_processors_have_completed(self): p = Page(url=some_url, owner=a_user) p.put() modify(page).content_extractors = [1,2, 3, 4, 5, 6] when(Content).for_url(p.url).then_return([Content(url=some_url)]) page.task_store_best_content(p.key()) p = Page.get(p.key()) self.assertEquals(p._title, None) self.assertEquals(p.content, None)
def test_reset_should_clear_all_content(self): p = Page(url=some_url, owner=a_user) p.content = "content!" p._raw_content = "raw content!" p.title = "title" p.update(force=True) self.assertEquals(p.content, None) self.assertEquals(p.raw_content, None) self.assertEqual(p.title, "title")
def test_store_best_content_should_do_so_if_all_extractors_are_complete(self): p = Page(url=some_url, owner=a_user, pending=True) p.put() modify(page).content_extractors = [1,2] best_content = Content(url=some_url, title='best title', body='best body') worst_content = Content(url=some_url) contents = [best_content, worst_content] when(Content).for_url(p.url).then_return(contents) page.task_store_best_content(p.key()) p = Page.get(p.key()) self.assertEquals(p.title, "best title") self.assertEquals(p.content, "best body")
def _add(self, user, url, success = None, force=False): new_page = None page = Page.find(user, url) if page is None: page = Page(owner=self.user(), url=url) page.start_content_population() new_page = page else: page.update(force=force) if force: new_page = page if page.errors: if not self.is_json(): self._render_error(page) else: if success and new_page: success(new_page) return new_page
def all_instances(self): return Page.find_all(self.user())
def setUp(self): from pagefeed.models import Page, Content super(CleanDBTest, self).setUp() db.delete(Page.all()) db.delete(Content.all())
def get(self): pages = Page.find_all(self.user()) json.dump(map(to_json, pages), self.response.out)
def get(self): page = Page.find(owner=self.user(), url=self.url()) if page is None or page.content is None: raise HttpError(404, "could not find content for page: %s" % (cgi.escape(self.url(),))) self.response.out.write(page.content)
def tearDown(self): [p.delete() for p in Page.all()]
def test_should_render_as_html(self): url = 'http://my_url/base_path/resource' p = Page(url=url, owner=a_user) p._content = '<body>b</body>' self.assertEqual(p.html.strip(), '<body>b</body>')