Ejemplo n.º 1
0
    def put(self, source_name):
        helper = RequestHelper(self)
        
        source_name = urllib.unquote(source_name)
        success, values = read_json_fields(helper, "url", "active", logger=logging)
        if not success:
            return
        url, active = values
        
        # a Feed must be sole owner of an ArtifactSource;
        # fails if source already exists and is already linked to a feed
        source = ArtifactSourceAccessor.get_by_name(source_name, return_none=True)
        if source:
            source_feed_key = Feed.get_by_source(source, keys_only=True, return_none=True)
            if source_feed_key:
                msg = "source '%s' is referenced by feed %s" % (source_name, source_feed_key.name())
                helper.error(409, msg)
        else:
            source = ArtifactSourceAccessor.create(source_name)
        
        # creates UrlResource if necessary
        resource = UrlResourceAccessor.get_by_url(url, return_none=True)
        if not resource:
            resource = UrlResourceAccessor.create(url)

        # create or update Feed
        feed = Feed.get_by_source_name(source_name, return_none=True)
        if feed:
            feed.artifact_source = source
            feed.url_resource = resource
            feed.put()
        else:
            Feed.create(source_name, artifact_source=source, url=url, url_resource=resource, active=bool(active))
        helper.set_status(204)
Ejemplo n.º 2
0
    def test_get_registers_appropriate_tasks(self):
        moxer = Mox()
        request, response = new_mock_request_response(moxer)

        _stub_taskqueue(moxer)
        moxer.StubOutWithMock(Feed, "find_active", use_mock_anything=True)

        def create_call(i):
            source_name = "source-%i" % i
            source = MockEntity(key_name=source_name, name=source_name)
            return MockEntity(key_name="feed-%i" % i,
                              artifact_source=source,
                              url="hi")

        q_range = xrange(0, 5)
        Feed.find_active().AndReturn(
            MockQuery(q_range, create_call=create_call))

        # expects queued tasks for each feed
        for i in q_range:
            taskqueue.add(name=IgnoreArg(), url=IgnoreArg())

        moxer.ReplayAll()
        handler = CronIngestDriverHandler()
        handler.initialize(request, response)
        handler.get()
        moxer.VerifyAll()
Ejemplo n.º 3
0
 def _test_post_no_user(self):
     moxer = Mox()
     
     request, response = new_mock_request_response(moxer)
     moxer.StubOutWithMock(users, "get_current_user", use_mock_anything=True)
     moxer.StubOutWithMock(User, "__init__", use_mock_anything=True)
     moxer.StubOutWithMock(Feed, "get_by_source_name", use_mock_anything=True)
     moxer.StubOutWithMock(model, "ingest_feed_entries")
     
     source_name = "hi"
     username = Services.API_USER
     user = MockEntity(key_name=username, email=lambda: username)
     
     # passes auth (via cron)
     users.get_current_user().AndReturn(user)
     handler = IngestHandler()
     # no logged in user
     users.get_current_user()
     User.__init__(username)
     handler.initialize(request, response)
     feed = MockEntity(key_name=source_name, url="no")
     Feed.get_by_source_name(source_name, return_none=True).AndReturn(feed)
     model.ingest_feed_entries(feed, None, error_call=IgnoreArg()).AndReturn(())
     
     moxer.ReplayAll()
     handler.post(source_name)
     moxer.VerifyAll()
Ejemplo n.º 4
0
    def post(self, **kw):
        helper = RequestHelper(self)
        results = {}
        source_q = ArtifactSource.all()
        for s in source_q:
            artifact_q = ArtifactInfo.find_by_source(s)
            count = len([a for a in artifact_q])
            counter = Counters.source_counter(s.name)
            old_count = counter.count()
            counter.set(count)

            source_result = {'old': old_count}

            # if source is linked to a feed, I can't delete it
            feed = Feed.get_by_source(s, return_none=True)
            if feed:
                source_result['feed'] = feed.url

            if not count and not feed:
                s.delete()
                source_result['deleted'] = True

            if count:
                source_result['new'] = count

            results[s.name] = source_result
        helper.write_json(results)
Ejemplo n.º 5
0
    def post(self, **kw):
        helper = RequestHelper(self)
        results = {}
        source_q = ArtifactSource.all()
        for s in source_q:
            artifact_q = ArtifactInfo.find_by_source(s)
            count = len([a for a in artifact_q])
            counter = Counters.source_counter(s.name)
            old_count = counter.count()
            counter.set(count)
            
            source_result = { 'old': old_count }
            
            # if source is linked to a feed, I can't delete it
            feed = Feed.get_by_source(s, return_none=True)
            if feed:
                source_result['feed'] = feed.url

            if not count and not feed:
                s.delete()
                source_result['deleted'] = True
            
            if count:
                source_result['new'] = count
                
            results[s.name] = source_result
        helper.write_json(results)
Ejemplo n.º 6
0
    def test_post_handles_ingest_error(self):
        m = Mox()
        request, response = new_mock_request_response(m)
        m.StubOutWithMock(Feed, "get_by_source_name" )
        
        request.get("keep").AndReturn(None)
        feed_name = "blah"
        Feed.get_by_source_name(feed_name, return_none=True).AndRaise(Exception("real bad"))

        # response.set_status(200)

        m.ReplayAll()
        handler = CronIngestHandler()
        handler.initialize(request, response)
        _assert_handles_error(lambda: handler.post(feed_name))
        m.VerifyAll()
Ejemplo n.º 7
0
    def test_post_handles_ingest_error(self):
        m = Mox()
        request, response = new_mock_request_response(m)
        m.StubOutWithMock(Feed, "get_by_source_name")

        request.get("keep").AndReturn(None)
        feed_name = "blah"
        Feed.get_by_source_name(feed_name, return_none=True).AndRaise(
            Exception("real bad"))

        # response.set_status(200)

        m.ReplayAll()
        handler = CronIngestHandler()
        handler.initialize(request, response)
        _assert_handles_error(lambda: handler.post(feed_name))
        m.VerifyAll()
Ejemplo n.º 8
0
 def get(self, source_name):
     helper = RequestHelper(self)
     source_name = urllib.unquote(source_name)
     f = Feed.get_by_source_name(source_name, return_none=True)
     if not f:
         helper.error(404)
         return
     helper.write_json(build_feed_hash(f))
Ejemplo n.º 9
0
 def delete(self, source_name):
     helper = RequestHelper(self)
     source_name = urllib.unquote(source_name)
     feed = Feed.get_by_source_name(source_name, return_none=True)
     if not feed:
         helper.error(404)
         return
     feed.delete()
     helper.set_status(204)
Ejemplo n.º 10
0
    def ingest(cls, handler, source_name):
        helper = RequestHelper(handler)
        source_name = urllib.unquote(source_name)

        keep = handler.request.get("keep")
        if keep:
            keep = int(keep)
        else:
            keep = 50

        # TODO: get from cache
        f = Feed.get_by_source_name(source_name, return_none=True)
        if not f:
            helper.error(404)
            return

        results = {}
        entries = []
        results['created'] = entries

        # TODO: use etag from previous ingest
        error_call = lambda entry, ex: logging.error(Exceptions.format_last())

        user = users.get_current_user()
        if not user:
            # there is no logged in user for cron requests
            user = User(Services.API_USER)

        try:
            for artifact_guid, entry, created in model.ingest_feed_entries(
                    f, user, error_call=error_call):
                entries.append({
                    "artifact-guid": artifact_guid,
                    "url": entry.link,
                    "title": entry.title,
                    "created": created
                })
        finally:
            # delete oldest feed entries
            # TODO: shouldn't I be deleting ArtifactContent instances also?
            def delete_info(c):
                try:
                    i = c.info
                    if i:
                        i.delete()
                except Exception, e:
                    pass

            deleted_key_names = ArtifactContent.delete_oldest_by_source(
                f.artifact_source, keep, pre_call=delete_info)

            results['deleted'] = deleted_key_names
            Counters.source_counter(f.artifact_source.name).decrement(
                len(deleted_key_names))
Ejemplo n.º 11
0
 def ingest(cls, handler, source_name):
     helper = RequestHelper(handler)
     source_name = urllib.unquote(source_name)
     
     keep = handler.request.get("keep")
     if keep:
         keep = int(keep)
     else:
         keep = 50
     
     # TODO: get from cache
     f = Feed.get_by_source_name(source_name, return_none=True)
     if not f:
         helper.error(404)
         return
 
     results = {}
     entries = []
     results['created'] = entries
 
     # TODO: use etag from previous ingest
     error_call = lambda entry, ex: logging.error(Exceptions.format_last())
 
     user = users.get_current_user()
     if not user:
         # there is no logged in user for cron requests
         user = User(Services.API_USER)
         
     try:
         for artifact_guid, entry, created in model.ingest_feed_entries(f, user, error_call=error_call):
             entries.append({ "artifact-guid": artifact_guid,
                 "url": entry.link,
                 "title": entry.title,
                 "created": created })
     finally:
         # delete oldest feed entries
         # TODO: shouldn't I be deleting ArtifactContent instances also?
         def delete_info(c):
           try:
             i = c.info
             if i:
               i.delete()
           except Exception, e:
             pass
             
         deleted_key_names = ArtifactContent.delete_oldest_by_source(f.artifact_source, keep, pre_call=delete_info)
         
         results['deleted'] = deleted_key_names
         Counters.source_counter(f.artifact_source.name).decrement(len(deleted_key_names))
Ejemplo n.º 12
0
 def test_get_registers_appropriate_tasks(self):
     moxer = Mox()
     request, response = new_mock_request_response(moxer)
     
     _stub_taskqueue(moxer)
     moxer.StubOutWithMock(Feed, "find_active", use_mock_anything=True)
     
     def create_call(i):
         source_name = "source-%i" % i
         source = MockEntity(key_name=source_name, name=source_name)
         return MockEntity(key_name="feed-%i" % i, artifact_source=source, url="hi")
     
     q_range = xrange(0,5)
     Feed.find_active().AndReturn(MockQuery(q_range, create_call=create_call))
     
     # expects queued tasks for each feed
     for i in q_range:
         taskqueue.add(name=IgnoreArg(), url=IgnoreArg())
     
     moxer.ReplayAll()
     handler = CronIngestDriverHandler()
     handler.initialize(request, response)
     handler.get()
     moxer.VerifyAll()
Ejemplo n.º 13
0
 def test_post_with_user(self):
     moxer = Mox()
     
     request, response = new_mock_request_response(moxer)
     moxer.StubOutWithMock(users, "get_current_user", use_mock_anything=True)
     # moxer.StubOutWithMock(ArtifactInfo, "delete_oldest_by_source", use_mock_anything=True)
     moxer.StubOutWithMock(ArtifactContent, "delete_oldest_by_source")
     moxer.StubOutWithMock(Counters, "source_counter")
     moxer.StubOutWithMock(Feed, "get_by_source_name", use_mock_anything=True)
     moxer.StubOutWithMock(model, "ingest_feed_entries")
     
     source_name = "hi"
     username = Services.API_USER
     user = MockEntity(key_name=username, email=lambda: username)
     
     users.get_current_user().AndReturn(user)
     handler = IngestHandler()
     users.get_current_user().AndReturn(user)
     handler.initialize(request, response)
     request.get("keep").AndReturn(None)
     
     counter = moxer.CreateMock(Counter)
     Counters.source_counter(source_name).AndReturn(counter)
     counter.decrement(IgnoreArg())
     
     source = MockEntity(key_name=source_name, name=source_name)
     feed = MockEntity(key_name=source_name, url="no", artifact_source=source)
     # ArtifactInfo.delete_oldest_by_source(source, IgnoreArg()).AndReturn([])
     ArtifactContent.delete_oldest_by_source(source, IgnoreArg(), pre_call=IgnoreArg()).AndReturn([])
     
     Feed.get_by_source_name(source_name, return_none=True).AndReturn(feed)
     model.ingest_feed_entries(feed, user, error_call=IgnoreArg()).AndReturn(())
     
     moxer.ReplayAll()
     handler.post(source_name)
     moxer.VerifyAll()
Ejemplo n.º 14
0
 def get(self):
     """ enqueues all active Feeds for ingest """
     # find all active feeds
     q = Feed.find_active()
     if not q.count():
         logging.warn("no active feeds found")
         return
     
     # generates unique is for task names
     ingest_id = dates.timestamp(separator="")
     
     for f in q.fetch(1000):
         try:
             # schedule tasks to ingest by source name
             source_name = f.artifact_source.name
             
             # replace invalid chars from source name
             normalized_source_name = self._source_sanitize_regex.sub("-", source_name)
             task_name = "ingest-%s-%s" % (normalized_source_name, ingest_id)
             taskqueue.add(name=task_name, url="/cron/ingest/%s" % source_name)
             logging.debug("queued ingest task %s for source '%s' (feed %s)" % (task_name, source_name, f.url))
         except (taskqueue.TaskAlreadyExistsError, taskqueue.TombstonedTaskError):
             logging.warn(traceback.format_exc())
Ejemplo n.º 15
0
 def get_by_source_name(cls, source_name, **kw):
     return Feed.get_by_source_name(source_name, **kw)
Ejemplo n.º 16
0
 def get_by_source_name(cls, source_name, **kw):
     return Feed.get_by_source_name(source_name, **kw)
Ejemplo n.º 17
0
 def get(self):
     helper = RequestHelper(self)
     results = []
     for f in Feed.all().order('url').fetch(50):
         results.append(build_feed_hash(f))
     helper.write_json(results)