def test_untagging_files(self): rf1 = S3Repo.add_file(self.random_filename(), s3_key='abc', date_published=now()) rf2 = S3Repo.add_file(self.random_filename(), s3_key='def', date_published=now()) rf1.tag_file('imported', 'processed') rf2.tag_file('imported', 'processed') S3Repo.commit() rf1.untag_file('imported') S3Repo.commit() self.assert_rf_tags( [ 's3_key', 'tag_name', ], [ rf1.s3_key, 'processed', ], [ rf2.s3_key, 'imported', ], [ rf2.s3_key, 'processed', ], )
def test_timelimit_for_deleting_unpublished_files(self): current_host = s3repo.host.RepoHost.current_host_id() rf1 = S3Repo.add_file(self.random_filename(), s3_key = "abc") rf2 = S3Repo.add_file(self.random_filename(), s3_key = "def") rf3 = S3Repo.add_file(self.random_filename(), s3_key = "ghi") dt = now() - seconds(self.config['fs.unpublished_stale_seconds']) - seconds(2) for i, rf in enumerate([ rf1, rf2, rf3 ]): rf.date_created = dt + seconds(i) rf.update() S3Repo.maintain_current_host() S3Repo.commit() self.assertSqlResults(self.conn(), """ SELECT * FROM s3_repo.files LEFT OUTER JOIN s3_repo.downloads USING (file_id) ORDER BY s3_key """, [ 'file_id', 'date_created', 'host_id', ], [ rf1.file_id, rf1.date_created, None, ], [ rf2.file_id, rf2.date_created, None, ], [ rf3.file_id, rf3.date_created, current_host, ], )
def test_untagging_files(self): rf1 = S3Repo.add_file(self.random_filename(), s3_key = 'abc', date_published = now()) rf2 = S3Repo.add_file(self.random_filename(), s3_key = 'def', date_published = now()) rf1.tag_file('imported', 'processed') rf2.tag_file('imported', 'processed') S3Repo.commit() rf1.untag_file('imported') S3Repo.commit() self.assert_rf_tags( [ 's3_key', 'tag_name', ], [ rf1.s3_key, 'processed', ], [ rf2.s3_key, 'imported', ], [ rf2.s3_key, 'processed', ], )
def test_save_backup(self): rf1 = S3Repo.add_file(s3_key = "abc") rf2 = S3Repo.add_file(s3_key = "bcd") rf3 = S3Repo.add_file(s3_key = "cde") rf4 = S3Repo.backup_db() S3Repo.commit() self.assertSqlResults(self.conn(), """ SELECT * FROM s3_repo ORDER BY s3_key """, [ 'file_no', 's3_bucket', 's3_key', 'published', 'date_published', 'file_size', ], [ rf1.file_no, rf1.s3_bucket, rf1.s3_key, rf1.published, rf1.date_published, rf1.file_size, ], [ rf2.file_no, rf2.s3_bucket, rf2.s3_key, rf2.published, rf2.date_published, rf2.file_size, ], [ rf3.file_no, rf3.s3_bucket, rf3.s3_key, rf3.published, rf3.date_published, rf3.file_size, ], [ rf4.file_no, rf4.s3_bucket, rf4.s3_key, True, now(), rf4.file_size, ], )
def test_month_tagging_files__creates_tags(self): rf = S3Repo.add_file(self.random_filename(), date_published = now()) rf.tag_date(coerce_date('2013-04-24 01:02:03'), type='month') rf.commit() self.assert_tags( [ 'tag_name', ], [ 'month=2013-04-01', ], )
def test_unpublished_files_are_only_removed_for_locally_created_content(self): rf1 = S3Repo.add_file(self.random_filename(), s3_key = "abc") rf2 = S3Repo.add_file(self.random_filename(), s3_key = "def") rf3 = S3Repo.add_file(self.random_filename(), s3_key = "ghi") for rf in [ rf1, rf2, rf3 ]: rf.date_created = now() - weeks(2) rf.update() rf3.origin = s3repo.host.RepoHost.find_or_create('abc').host_id rf3.update() S3Repo.commit() S3Repo.maintain_current_host() self.assertFalse(os.path.exists(rf1.local_path())) self.assertFalse(os.path.exists(rf2.local_path())) self.assertTrue(os.path.exists(rf3.local_path())) S3Repo.commit()
def test_hour_tagging_files_is_default(self): rf = S3Repo.add_file(self.random_filename(), date_published = now()) rf.tag_date(coerce_date('2013-04-24 01:02:03')) # Hour is is the default rf.commit() self.assert_tags( [ 'tag_name', ], [ 'day=2013-04-24', ], [ 'hour=2013-04-24 01:00:00', ], [ 'month=2013-04-01', ], [ 'week=2013-04-22', ], )
def test_maintain_database_does_not_delete_published_files(self): current_host = s3repo.host.RepoHost.current_host_id() rf1 = S3Repo.add_file(self.random_filename(), s3_key = "abc") rf2 = S3Repo.add_file(self.random_filename(), s3_key = "def") rf3 = S3Repo.add_file(self.random_filename(), s3_key = "ghi") rf1.publish() rf1.unlink() rf2.unlink() S3Repo.commit() self.assertSqlResults(self.conn(), """ SELECT * FROM s3_repo.files LEFT OUTER JOIN s3_repo.downloads USING (file_id) ORDER BY s3_key """, [ 'file_id', 's3_key', 'date_published', 'published', 'host_id', ], [ rf1.file_id, 'abc', now(), True, None, ], # Published but doesnt exist anywhere. Should still exist [ rf2.file_id, 'def', None, False, None, ], # Unpublished and exists nowhere. Should be deleted [ rf3.file_id, 'ghi', None, False, current_host, ], # Unpublished, but still exists somewhere. Should still exist ) S3Repo.maintain_database() S3Repo.commit() self.assertSqlResults(self.conn(), """ SELECT * FROM s3_repo.files LEFT OUTER JOIN s3_repo.downloads USING (file_id) ORDER BY s3_key """, [ 'file_id', 's3_key', 'date_published', 'published', 'host_id', ], [ rf1.file_id, 'abc', now(), True, None, ], # Published but doesnt exist anywhere. Should still exist [ rf3.file_id, 'ghi', None, False, current_host, ], # Unpublished, but still exists somewhere. Should still exist )
def test_month_tagging_files__creates_tags(self): rf = S3Repo.add_file(self.random_filename(), date_published=now()) rf.tag_date(coerce_date('2013-04-24 01:02:03'), type='month') rf.commit() self.assert_tags( [ 'tag_name', ], [ 'month=2013-04-01', ], )
def setup_default_tag_files(self, publish = True): rfs = [ S3Repo.add_file(self.random_filename()) for x in xrange(4) ] if publish: for rf in rfs: rf.publish() rfs[0].tag_file('imported', 'processed', 'archived') rfs[1].tag_file('imported', 'processed') rfs[2].tag_file('processed', 'restored', 'restricted') S3Repo.commit() # rfs[3] is untagged return rfs
def setup_default_tag_files(self, publish=True): rfs = [S3Repo.add_file(self.random_filename()) for x in xrange(4)] if publish: for rf in rfs: rf.publish() rfs[0].tag_file('imported', 'processed', 'archived') rfs[1].tag_file('imported', 'processed') rfs[2].tag_file('processed', 'restored', 'restricted') S3Repo.commit() # rfs[3] is untagged return rfs
def test_hour_tagging_files_is_default(self): rf = S3Repo.add_file(self.random_filename(), date_published=now()) rf.tag_date( coerce_date('2013-04-24 01:02:03')) # Hour is is the default rf.commit() self.assert_tags( [ 'tag_name', ], [ 'day=2013-04-24', ], [ 'hour=2013-04-24 01:00:00', ], [ 'month=2013-04-01', ], [ 'week=2013-04-22', ], )