def test_create(self): accessor_save_kw = self.__keywords() source_name = accessor_save_kw['source'] content_type = accessor_save_kw['content_type'] body = accessor_save_kw['body'] self.moxer.StubOutWithMock(ArtifactInfo, "all", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactSource, "get_or_create", use_mock_anything=True) self.moxer.StubOutWithMock(Counters, "source_counter", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactInfo, "create", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactContent, "create", use_mock_anything=True) source = MockEntity(key_name=source_name) ArtifactInfo.all(keys_only=True).AndReturn(MockQuery(None, keys_only=True)) ArtifactSource.get_or_create(source_name).AndReturn(source) counter = self.moxer.CreateMockAnything() Counters.source_counter(source_name).AndReturn(counter) counter.increment() # TODO: I wish I could ignore keywords md5 = ArtifactAccessor._content_md5(source_name, content_type, body) info_save_kw = dict(source=source, source_name=source_name, content_type=content_type, content_md5=md5) info_key = MockKey(name=self.test_id) ArtifactInfo.create(**info_save_kw).AndReturn(info_key) content_save_kw = dict(source=source, source_name=source_name, info=info_key, body=body) ArtifactContent.create(info_key.name(), **content_save_kw).AndReturn(MockKey(name=self.test_id)) self.moxer.ReplayAll() info, content, source = ArtifactAccessor.create(**accessor_save_kw) print 'info:%s, content:%s, source:%s' % (info, content, source) self.moxer.VerifyAll()
def _test_delete(self): self.moxer.StubOutWithMock(ArtifactInfo, "get_by_guid", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactContent, "get_by_guid", use_mock_anything=True) guid = 'blah' ArtifactInfo.get_by_guid(guid).AndReturn(MockEntity(MockKey(name=guid))) ArtifactContent.get_by_guid(guid).AndReturn(MockEntity(MockKey(name=guid))) self.moxer.ReplayAll() ArtifactAccessor.delete(guid) self.moxer.VerifyAll()
def test_create_duplicate(self): self.moxer.StubOutWithMock(ArtifactInfo, "all", use_mock_anything=True) ArtifactInfo.all(keys_only=True).AndReturn(MockQuery(range(1), keys_only=True)) self.moxer.ReplayAll() try: ArtifactAccessor.create(**self.__keywords()) self.fail("exception expected") except DuplicateDataException, ex: pass
def test_create_duplicate(self): self.moxer.StubOutWithMock(ArtifactInfo, "all", use_mock_anything=True) ArtifactInfo.all(keys_only=True).AndReturn( MockQuery(range(1), keys_only=True)) self.moxer.ReplayAll() try: ArtifactAccessor.create(**self.__keywords()) self.fail("exception expected") except DuplicateDataException, ex: pass
def test_delete_nonexistent(self): self.moxer.StubOutWithMock(ArtifactInfo, "get_by_guid", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactContent, "get_by_guid", use_mock_anything=True) guid = 'blah' ArtifactInfo.get_by_guid(guid) ArtifactContent.get_by_guid(guid) self.moxer.ReplayAll() try: ArtifactAccessor.delete(guid) self.fail("exception expected") except NotFoundException, ex: pass
def test_create(self): accessor_save_kw = self.__keywords() source_name = accessor_save_kw['source'] content_type = accessor_save_kw['content_type'] body = accessor_save_kw['body'] self.moxer.StubOutWithMock(ArtifactInfo, "all", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactSource, "get_or_create", use_mock_anything=True) self.moxer.StubOutWithMock(Counters, "source_counter", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactInfo, "create", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactContent, "create", use_mock_anything=True) source = MockEntity(key_name=source_name) ArtifactInfo.all(keys_only=True).AndReturn( MockQuery(None, keys_only=True)) ArtifactSource.get_or_create(source_name).AndReturn(source) counter = self.moxer.CreateMockAnything() Counters.source_counter(source_name).AndReturn(counter) counter.increment() # TODO: I wish I could ignore keywords md5 = ArtifactAccessor._content_md5(source_name, content_type, body) info_save_kw = dict(source=source, source_name=source_name, content_type=content_type, content_md5=md5) info_key = MockKey(name=self.test_id) ArtifactInfo.create(**info_save_kw).AndReturn(info_key) content_save_kw = dict(source=source, source_name=source_name, info=info_key, body=body) ArtifactContent.create(info_key.name(), **content_save_kw).AndReturn( MockKey(name=self.test_id)) self.moxer.ReplayAll() info, content, source = ArtifactAccessor.create(**accessor_save_kw) print 'info:%s, content:%s, source:%s' % (info, content, source) self.moxer.VerifyAll()
def put(cls, rhandler, guid, **kw): helper = RequestHelper(rhandler) artifact = ArtifactInfo.get_by_guid(guid) if not artifact: helper.error(404) return # removes existing properties props = ArtifactInfo.properties().keys() for prop in props: delattr(artifact, prop) # save artifact ArtifactInfo.save(artifact)
def delete_by_name(cls, source_name): source = ArtifactSource.get_by_name(source_name) logging.debug("delete_by_name source: %s" % source) if not source: raise NotFoundException("ArtifactSource %s" % source_name) # checks for feeds linked to source feed = FeedAccessor.get_by_source_name(source_name, return_none=True) if feed: raise ConflictingDataException("ArtifactSource '%s' is referenced by Feed '%s'" % (source_name, feed.url)) # finds and deletes artifacts for source info_keys = ArtifactInfo.find_by_source(source, keys_only=True) content_keys = ArtifactContent.find_by_source(source) # zips keys to delete info/content pairs back-to-back for artifact_keys in zip(info_keys, content_keys): db.delete(artifact_keys) # deletes extras if info/content sizes don't match # (this would be a data bug somewhere) content_len = content_keys.count() info_len = info_keys.count() if content_len < info_len: db.delete(content_keys[info_len:]) elif info_len > content_len: db.delete(info_keys[content_len:]) # deletes source db.delete(source)
def create(cls, **kw): """ keywords: source content_type body returns: tuple: (ArtifactInfo key, ArtifactContent key, ArtifactSource key) raises: DuplicateDataException - if artifact already exists """ if not kw: raise IllegalArgumentException("keywords must be provided") source_name = kw.pop("source", None) content_type = kw.get("content_type") if not source_name: raise IllegalArgumentException("source keyword must be provided.") elif not content_type: raise IllegalArgumentException("content_type keyword must be provided.") # I pop "body" since I can't include it as a keyword for ArtifactInfo.create() body = kw.pop("body", None) # hashes content to avoid saving a duplicate content_md5 = cls._content_md5(source_name, content_type, body) found_artifact_key = ArtifactInfo.find_by_content_md5(content_md5, keys_only=True).get() if found_artifact_key: raise DuplicateDataException("artifact %s" % (found_artifact_key.name())) return cls._create(source_name, body, content_md5, **kw)
def post(self, **kw): helper = RequestHelper(self) results = {} source_q = ArtifactSource.all() for s in source_q: artifact_q = ArtifactInfo.find_by_source(s) count = len([a for a in artifact_q]) counter = Counters.source_counter(s.name) old_count = counter.count() counter.set(count) source_result = { 'old': old_count } # if source is linked to a feed, I can't delete it feed = Feed.get_by_source(s, return_none=True) if feed: source_result['feed'] = feed.url if not count and not feed: s.delete() source_result['deleted'] = True if count: source_result['new'] = count results[s.name] = source_result helper.write_json(results)
def post(self, **kw): helper = RequestHelper(self) results = {} source_q = ArtifactSource.all() for s in source_q: artifact_q = ArtifactInfo.find_by_source(s) count = len([a for a in artifact_q]) counter = Counters.source_counter(s.name) old_count = counter.count() counter.set(count) source_result = {'old': old_count} # if source is linked to a feed, I can't delete it feed = Feed.get_by_source(s, return_none=True) if feed: source_result['feed'] = feed.url if not count and not feed: s.delete() source_result['deleted'] = True if count: source_result['new'] = count results[s.name] = source_result helper.write_json(results)
def find_or_create(cls, **kw): """ returns: tuple: (ArtifactInfo key, ArtifactContent key, ArtifactSource key, created) """ if not kw: raise IllegalArgumentException("keywords must be provided") source_name = kw.pop("source", None) content_type = kw.get("content_type") if not source_name: raise IllegalArgumentException("source keyword must be provided.") elif not content_type: raise IllegalArgumentException( "content_type keyword must be provided.") # I pop "body" since I can't include it as a keyword for ArtifactInfo.create() body = kw.pop("body", None) # hashes content to avoid saving a duplicate content_md5 = cls._content_md5(source_name, content_type, body) found_artifact = ArtifactInfo.find_by_content_md5(content_md5).get() if found_artifact: info_key = found_artifact.key() content_key = ArtifactContent.get_by_guid( found_artifact.guid).key() source_key = found_artifact.source.key() created = False else: info_key, content_key, source_key = cls._create( source_name, body, content_md5, **kw) created = True return (info_key, content_key, source_key, created)
def delete_by_name(cls, source_name): source = ArtifactSource.get_by_name(source_name) logging.debug("delete_by_name source: %s" % source) if not source: raise NotFoundException('ArtifactSource %s' % source_name) # checks for feeds linked to source feed = FeedAccessor.get_by_source_name(source_name, return_none=True) if feed: raise ConflictingDataException( "ArtifactSource '%s' is referenced by Feed '%s'" % (source_name, feed.url)) # finds and deletes artifacts for source info_keys = ArtifactInfo.find_by_source(source, keys_only=True) content_keys = ArtifactContent.find_by_source(source) # zips keys to delete info/content pairs back-to-back for artifact_keys in zip(info_keys, content_keys): db.delete(artifact_keys) # deletes extras if info/content sizes don't match # (this would be a data bug somewhere) content_len = content_keys.count() info_len = info_keys.count() if content_len < info_len: db.delete(content_keys[info_len:]) elif info_len > content_len: db.delete(info_keys[content_len:]) # deletes source db.delete(source)
def find_or_create(cls, **kw): """ returns: tuple: (ArtifactInfo key, ArtifactContent key, ArtifactSource key, created) """ if not kw: raise IllegalArgumentException("keywords must be provided") source_name = kw.pop("source", None) content_type = kw.get("content_type") if not source_name: raise IllegalArgumentException("source keyword must be provided.") elif not content_type: raise IllegalArgumentException("content_type keyword must be provided.") # I pop "body" since I can't include it as a keyword for ArtifactInfo.create() body = kw.pop("body", None) # hashes content to avoid saving a duplicate content_md5 = cls._content_md5(source_name, content_type, body) found_artifact = ArtifactInfo.find_by_content_md5(content_md5).get() if found_artifact: info_key = found_artifact.key() content_key = ArtifactContent.get_by_guid(found_artifact.guid).key() source_key = found_artifact.source.key() created = False else: info_key, content_key, source_key = cls._create(source_name, body, content_md5, **kw) created = True return (info_key, content_key, source_key, created)
def get(self, **kw): helper = RequestHelper(self) q = self.request.get("q", None) output = self.request.get("o", None) max_results = int(self.request.get("max", -1)) if not q: helper.error(400, "q not provided.") return q_results = ArtifactContent.all().search(q) json_results = None if output == "short": json_results = {} json_results["count"] = q_results.count() elif output == "id": json_results = {} count = q_results.count() if max_results > 0 and max_results < q_results.count(): count = max_results json_results["count"] = count ids = [] json_results["ids"] = ids results = q_results.fetch(1000) if max_results == -1 else q_results.fetch(max_results) for c in results: ids.append(c.guid) else: json_results = [] if q_results.count(): for content in q_results.fetch(10): info = ArtifactInfo.get_by_guid(content.guid) json_results.append(ArtifactsHelper.artifact_to_hash(info, content)) helper.write_json(json_results)
def _test_delete(self): self.moxer.StubOutWithMock(ArtifactInfo, "get_by_guid", use_mock_anything=True) self.moxer.StubOutWithMock(ArtifactContent, "get_by_guid", use_mock_anything=True) guid = 'blah' ArtifactInfo.get_by_guid(guid).AndReturn(MockEntity( MockKey(name=guid))) ArtifactContent.get_by_guid(guid).AndReturn( MockEntity(MockKey(name=guid))) self.moxer.ReplayAll() ArtifactAccessor.delete(guid) self.moxer.VerifyAll()
def get(cls, rhandler, guid, **kw): helper = RequestHelper(rhandler) artifact_info = ArtifactInfo.get_by_guid(guid) artifact_content = ArtifactContent.get_by_guid(guid) if artifact_info and artifact_content: artifact_hash = ArtifactsHelper.artifact_to_hash(artifact_info, artifact_content) helper.write_json(artifact_hash) else: helper.error(404)
def test_delete_by_name_deletes_source_with_no_referencing_feed(self): self.m.StubOutWithMock(ArtifactSource, "get_by_name") self.m.StubOutWithMock(FeedAccessor, "get_by_source_name") self.m.StubOutWithMock(ArtifactInfo, "find_by_source") self.m.StubOutWithMock(ArtifactContent, "find_by_source") self.m.StubOutWithMock(db, "delete") self.m.StubOutWithMock(memcache, "delete") name = "mhawthorne" source = MockEntity(key_name=name) ArtifactSource.get_by_name(name).AndReturn(source) FeedAccessor.get_by_source_name(name, return_none=True) ArtifactInfo.find_by_source(source, keys_only=True).AndReturn(MockQuery(range(0,0))) ArtifactContent.find_by_source(source).AndReturn(MockQuery(range(0,0))) db.delete(source) memcache.delete(IsA(str)).AndReturn(1) self.m.ReplayAll() ArtifactSourceAccessor.delete_by_name(name) self.m.VerifyAll()
def get(self, **kw): helper = RequestHelper(self) start = int(self.request.get("start", 0)) count = int(self.request.get("count", 10)) q = ArtifactInfo.all().order("-modified") json_results = [] if q.count(): for a_info in q.fetch(count, start): a_content = ArtifactAccessor.get_content_by_guid(a_info.guid) json_results.append(ArtifactsHelper.artifact_to_hash(a_info, a_content)) helper.write_json(json_results)
def test_delete_by_name_deletes_source_with_no_referencing_feed(self): self.m.StubOutWithMock(ArtifactSource, "get_by_name") self.m.StubOutWithMock(FeedAccessor, "get_by_source_name") self.m.StubOutWithMock(ArtifactInfo, "find_by_source") self.m.StubOutWithMock(ArtifactContent, "find_by_source") self.m.StubOutWithMock(db, "delete") self.m.StubOutWithMock(memcache, "delete") name = "mhawthorne" source = MockEntity(key_name=name) ArtifactSource.get_by_name(name).AndReturn(source) FeedAccessor.get_by_source_name(name, return_none=True) ArtifactInfo.find_by_source(source, keys_only=True).AndReturn( MockQuery(range(0, 0))) ArtifactContent.find_by_source(source).AndReturn(MockQuery(range(0, 0))) db.delete(source) memcache.delete(IsA(str)).AndReturn(1) self.m.ReplayAll() ArtifactSourceAccessor.delete_by_name(name) self.m.VerifyAll()
def _create(cls, source_name, body, content_md5, **kw): # saves source, if unique source_key = ArtifactSource.get_or_create(source_name) # saves ArtifactInfo a_info_key = ArtifactInfo.create(content_md5=content_md5, source=source_key, source_name=source_name, **kw) # saves ArtifactContent guid = a_info_key.name() a_content_key = ArtifactContent.create( guid, body=body, source=source_key, source_name=source_name, info=a_info_key ) # bump source counter # it's important to do this AFTER the artifacts are saved Counters.source_counter(source_name).increment() return a_info_key, a_content_key, source_key
def delete(cls, guid): logger = LoggerFactory.logger(cls.__name__) a_info = ArtifactInfo.get_by_guid(guid) a_content_key = ArtifactContent.get_by_guid(guid) if not (a_info or a_content_key): # neither record found raise NotFoundException("artifact %s" % guid) elif not (a_info and a_content_key): # one record found; one missing logger.warn("artifact %s; missing data; info=%s; content=%s" % (guid, a_info.key().name(), a_content_key)) # I delete what I can keys = [] if a_info: keys.append(a_info) if a_content_key: keys.append(a_content_key) db.delete(keys) # decrease source counter Counters.source_counter(a_info.source.name).decrement()
class ArtifactBulkDeleteHandler(webapp.RequestHandler): """ I wanted this to be in the DELETE method for ArtifactsHandler, but a client limitation currently prevents it """ def post(self): helper = RequestHelper(self) json_body = self.request.body if not json_body: helper.error(400, "body required") return decoded_body = urllib.unquote(json_body) try: body_hash = json.loads(decoded_body) except json.JSONDecodeError, e: msg = "malformed json: %s" % decoded_body helper.error(400, msg) logging.info(msg) return ids = body_hash.get("ids", None) if not ids: msg = "no 'ids' field provided in JSON" helper.error(400, msg) logging.info(msg) return logging.info("deleting %s artifact(s)" % len(ids)) contents = [c for c in ArtifactContent.get_by_key_name(ids) if c is not None] logging.info("deleting %s ArtifactContent instances" % len(contents)) db.delete(contents) infos = [i for i in ArtifactInfo.get_by_key_name(ids) if i is not None] logging.info("deleting %s ArtifactInfo instances" % len(infos)) db.delete(infos) helper.set_status(204)
def _create(cls, source_name, body, content_md5, **kw): # saves source, if unique source_key = ArtifactSource.get_or_create(source_name) # saves ArtifactInfo a_info_key = ArtifactInfo.create(content_md5=content_md5, source=source_key, source_name=source_name, **kw) # saves ArtifactContent guid = a_info_key.name() a_content_key = ArtifactContent.create(guid, body=body, source=source_key, source_name=source_name, info=a_info_key) # bump source counter # it's important to do this AFTER the artifacts are saved Counters.source_counter(source_name).increment() return a_info_key, a_content_key, source_key
def create(cls, **kw): """ keywords: source content_type body returns: tuple: (ArtifactInfo key, ArtifactContent key, ArtifactSource key) raises: DuplicateDataException - if artifact already exists """ if not kw: raise IllegalArgumentException("keywords must be provided") source_name = kw.pop("source", None) content_type = kw.get("content_type") if not source_name: raise IllegalArgumentException("source keyword must be provided.") elif not content_type: raise IllegalArgumentException( "content_type keyword must be provided.") # I pop "body" since I can't include it as a keyword for ArtifactInfo.create() body = kw.pop("body", None) # hashes content to avoid saving a duplicate content_md5 = cls._content_md5(source_name, content_type, body) found_artifact_key = ArtifactInfo.find_by_content_md5( content_md5, keys_only=True).get() if found_artifact_key: raise DuplicateDataException("artifact %s" % (found_artifact_key.name())) return cls._create(source_name, body, content_md5, **kw)
def count_infos(cls, source): return ArtifactInfo.find_by_source(source, keys_only=True).count()
def find_newer(cls, timestamp, **kw): return ArtifactInfo.find_newer(timestamp, **kw)
def find_artifact_counts_newer(cls, datetime, **kw): counts = KeyCounter() for art in ArtifactInfo.find_newer(datetime, **kw): counts.increment(art.source_name) return counts.to_hash()