def put(self, source_name): helper = RequestHelper(self) source_name = urllib.unquote(source_name) success, values = read_json_fields(helper, "url", "active", logger=logging) if not success: return url, active = values # a Feed must be sole owner of an ArtifactSource; # fails if source already exists and is already linked to a feed source = ArtifactSourceAccessor.get_by_name(source_name, return_none=True) if source: source_feed_key = Feed.get_by_source(source, keys_only=True, return_none=True) if source_feed_key: msg = "source '%s' is referenced by feed %s" % (source_name, source_feed_key.name()) helper.error(409, msg) else: source = ArtifactSourceAccessor.create(source_name) # creates UrlResource if necessary resource = UrlResourceAccessor.get_by_url(url, return_none=True) if not resource: resource = UrlResourceAccessor.create(url) # create or update Feed feed = Feed.get_by_source_name(source_name, return_none=True) if feed: feed.artifact_source = source feed.url_resource = resource feed.put() else: Feed.create(source_name, artifact_source=source, url=url, url_resource=resource, active=bool(active)) helper.set_status(204)
def test_get_registers_appropriate_tasks(self): moxer = Mox() request, response = new_mock_request_response(moxer) _stub_taskqueue(moxer) moxer.StubOutWithMock(Feed, "find_active", use_mock_anything=True) def create_call(i): source_name = "source-%i" % i source = MockEntity(key_name=source_name, name=source_name) return MockEntity(key_name="feed-%i" % i, artifact_source=source, url="hi") q_range = xrange(0, 5) Feed.find_active().AndReturn( MockQuery(q_range, create_call=create_call)) # expects queued tasks for each feed for i in q_range: taskqueue.add(name=IgnoreArg(), url=IgnoreArg()) moxer.ReplayAll() handler = CronIngestDriverHandler() handler.initialize(request, response) handler.get() moxer.VerifyAll()
def _test_post_no_user(self): moxer = Mox() request, response = new_mock_request_response(moxer) moxer.StubOutWithMock(users, "get_current_user", use_mock_anything=True) moxer.StubOutWithMock(User, "__init__", use_mock_anything=True) moxer.StubOutWithMock(Feed, "get_by_source_name", use_mock_anything=True) moxer.StubOutWithMock(model, "ingest_feed_entries") source_name = "hi" username = Services.API_USER user = MockEntity(key_name=username, email=lambda: username) # passes auth (via cron) users.get_current_user().AndReturn(user) handler = IngestHandler() # no logged in user users.get_current_user() User.__init__(username) handler.initialize(request, response) feed = MockEntity(key_name=source_name, url="no") Feed.get_by_source_name(source_name, return_none=True).AndReturn(feed) model.ingest_feed_entries(feed, None, error_call=IgnoreArg()).AndReturn(()) moxer.ReplayAll() handler.post(source_name) moxer.VerifyAll()
def post(self, **kw): helper = RequestHelper(self) results = {} source_q = ArtifactSource.all() for s in source_q: artifact_q = ArtifactInfo.find_by_source(s) count = len([a for a in artifact_q]) counter = Counters.source_counter(s.name) old_count = counter.count() counter.set(count) source_result = {'old': old_count} # if source is linked to a feed, I can't delete it feed = Feed.get_by_source(s, return_none=True) if feed: source_result['feed'] = feed.url if not count and not feed: s.delete() source_result['deleted'] = True if count: source_result['new'] = count results[s.name] = source_result helper.write_json(results)
def post(self, **kw): helper = RequestHelper(self) results = {} source_q = ArtifactSource.all() for s in source_q: artifact_q = ArtifactInfo.find_by_source(s) count = len([a for a in artifact_q]) counter = Counters.source_counter(s.name) old_count = counter.count() counter.set(count) source_result = { 'old': old_count } # if source is linked to a feed, I can't delete it feed = Feed.get_by_source(s, return_none=True) if feed: source_result['feed'] = feed.url if not count and not feed: s.delete() source_result['deleted'] = True if count: source_result['new'] = count results[s.name] = source_result helper.write_json(results)
def test_post_handles_ingest_error(self): m = Mox() request, response = new_mock_request_response(m) m.StubOutWithMock(Feed, "get_by_source_name" ) request.get("keep").AndReturn(None) feed_name = "blah" Feed.get_by_source_name(feed_name, return_none=True).AndRaise(Exception("real bad")) # response.set_status(200) m.ReplayAll() handler = CronIngestHandler() handler.initialize(request, response) _assert_handles_error(lambda: handler.post(feed_name)) m.VerifyAll()
def test_post_handles_ingest_error(self): m = Mox() request, response = new_mock_request_response(m) m.StubOutWithMock(Feed, "get_by_source_name") request.get("keep").AndReturn(None) feed_name = "blah" Feed.get_by_source_name(feed_name, return_none=True).AndRaise( Exception("real bad")) # response.set_status(200) m.ReplayAll() handler = CronIngestHandler() handler.initialize(request, response) _assert_handles_error(lambda: handler.post(feed_name)) m.VerifyAll()
def get(self, source_name): helper = RequestHelper(self) source_name = urllib.unquote(source_name) f = Feed.get_by_source_name(source_name, return_none=True) if not f: helper.error(404) return helper.write_json(build_feed_hash(f))
def delete(self, source_name): helper = RequestHelper(self) source_name = urllib.unquote(source_name) feed = Feed.get_by_source_name(source_name, return_none=True) if not feed: helper.error(404) return feed.delete() helper.set_status(204)
def ingest(cls, handler, source_name): helper = RequestHelper(handler) source_name = urllib.unquote(source_name) keep = handler.request.get("keep") if keep: keep = int(keep) else: keep = 50 # TODO: get from cache f = Feed.get_by_source_name(source_name, return_none=True) if not f: helper.error(404) return results = {} entries = [] results['created'] = entries # TODO: use etag from previous ingest error_call = lambda entry, ex: logging.error(Exceptions.format_last()) user = users.get_current_user() if not user: # there is no logged in user for cron requests user = User(Services.API_USER) try: for artifact_guid, entry, created in model.ingest_feed_entries( f, user, error_call=error_call): entries.append({ "artifact-guid": artifact_guid, "url": entry.link, "title": entry.title, "created": created }) finally: # delete oldest feed entries # TODO: shouldn't I be deleting ArtifactContent instances also? def delete_info(c): try: i = c.info if i: i.delete() except Exception, e: pass deleted_key_names = ArtifactContent.delete_oldest_by_source( f.artifact_source, keep, pre_call=delete_info) results['deleted'] = deleted_key_names Counters.source_counter(f.artifact_source.name).decrement( len(deleted_key_names))
def ingest(cls, handler, source_name): helper = RequestHelper(handler) source_name = urllib.unquote(source_name) keep = handler.request.get("keep") if keep: keep = int(keep) else: keep = 50 # TODO: get from cache f = Feed.get_by_source_name(source_name, return_none=True) if not f: helper.error(404) return results = {} entries = [] results['created'] = entries # TODO: use etag from previous ingest error_call = lambda entry, ex: logging.error(Exceptions.format_last()) user = users.get_current_user() if not user: # there is no logged in user for cron requests user = User(Services.API_USER) try: for artifact_guid, entry, created in model.ingest_feed_entries(f, user, error_call=error_call): entries.append({ "artifact-guid": artifact_guid, "url": entry.link, "title": entry.title, "created": created }) finally: # delete oldest feed entries # TODO: shouldn't I be deleting ArtifactContent instances also? def delete_info(c): try: i = c.info if i: i.delete() except Exception, e: pass deleted_key_names = ArtifactContent.delete_oldest_by_source(f.artifact_source, keep, pre_call=delete_info) results['deleted'] = deleted_key_names Counters.source_counter(f.artifact_source.name).decrement(len(deleted_key_names))
def test_get_registers_appropriate_tasks(self): moxer = Mox() request, response = new_mock_request_response(moxer) _stub_taskqueue(moxer) moxer.StubOutWithMock(Feed, "find_active", use_mock_anything=True) def create_call(i): source_name = "source-%i" % i source = MockEntity(key_name=source_name, name=source_name) return MockEntity(key_name="feed-%i" % i, artifact_source=source, url="hi") q_range = xrange(0,5) Feed.find_active().AndReturn(MockQuery(q_range, create_call=create_call)) # expects queued tasks for each feed for i in q_range: taskqueue.add(name=IgnoreArg(), url=IgnoreArg()) moxer.ReplayAll() handler = CronIngestDriverHandler() handler.initialize(request, response) handler.get() moxer.VerifyAll()
def test_post_with_user(self): moxer = Mox() request, response = new_mock_request_response(moxer) moxer.StubOutWithMock(users, "get_current_user", use_mock_anything=True) # moxer.StubOutWithMock(ArtifactInfo, "delete_oldest_by_source", use_mock_anything=True) moxer.StubOutWithMock(ArtifactContent, "delete_oldest_by_source") moxer.StubOutWithMock(Counters, "source_counter") moxer.StubOutWithMock(Feed, "get_by_source_name", use_mock_anything=True) moxer.StubOutWithMock(model, "ingest_feed_entries") source_name = "hi" username = Services.API_USER user = MockEntity(key_name=username, email=lambda: username) users.get_current_user().AndReturn(user) handler = IngestHandler() users.get_current_user().AndReturn(user) handler.initialize(request, response) request.get("keep").AndReturn(None) counter = moxer.CreateMock(Counter) Counters.source_counter(source_name).AndReturn(counter) counter.decrement(IgnoreArg()) source = MockEntity(key_name=source_name, name=source_name) feed = MockEntity(key_name=source_name, url="no", artifact_source=source) # ArtifactInfo.delete_oldest_by_source(source, IgnoreArg()).AndReturn([]) ArtifactContent.delete_oldest_by_source(source, IgnoreArg(), pre_call=IgnoreArg()).AndReturn([]) Feed.get_by_source_name(source_name, return_none=True).AndReturn(feed) model.ingest_feed_entries(feed, user, error_call=IgnoreArg()).AndReturn(()) moxer.ReplayAll() handler.post(source_name) moxer.VerifyAll()
def get(self): """ enqueues all active Feeds for ingest """ # find all active feeds q = Feed.find_active() if not q.count(): logging.warn("no active feeds found") return # generates unique is for task names ingest_id = dates.timestamp(separator="") for f in q.fetch(1000): try: # schedule tasks to ingest by source name source_name = f.artifact_source.name # replace invalid chars from source name normalized_source_name = self._source_sanitize_regex.sub("-", source_name) task_name = "ingest-%s-%s" % (normalized_source_name, ingest_id) taskqueue.add(name=task_name, url="/cron/ingest/%s" % source_name) logging.debug("queued ingest task %s for source '%s' (feed %s)" % (task_name, source_name, f.url)) except (taskqueue.TaskAlreadyExistsError, taskqueue.TombstonedTaskError): logging.warn(traceback.format_exc())
def get_by_source_name(cls, source_name, **kw): return Feed.get_by_source_name(source_name, **kw)
def get(self): helper = RequestHelper(self) results = [] for f in Feed.all().order('url').fetch(50): results.append(build_feed_hash(f)) helper.write_json(results)