def cleanup_old_files(*, base_filter_id): log.info('Starting clean up of old MLBF folders...') six_months_ago = datetime_to_ts(datetime.now() - timedelta(weeks=26)) base_filter_ts = int(base_filter_id) storage = SafeStorage(user_media='mlbf_storage') for dir in storage.listdir(settings.MLBF_STORAGE_PATH)[0]: dir = force_str(dir) # skip non-numeric folder names if not dir.isdigit(): log.info('Skipping %s because not a timestamp', dir) continue dir_ts = int(dir) dir_as_date = datetime.fromtimestamp(dir_ts / 1000) # delete if >6 months old and <base_filter_id if dir_ts > six_months_ago: log.info('Skipping %s because < 6 months old (%s)', dir, dir_as_date) elif dir_ts > base_filter_ts: log.info( 'Skipping %s because more recent (%s) than base mlbf (%s)', dir, dir_as_date, datetime.fromtimestamp(base_filter_ts / 1000), ) else: log.info('Deleting %s because > 6 months old (%s)', dir, dir_as_date) storage.rm_stored_dir(os.path.join(settings.MLBF_STORAGE_PATH, dir))
def delete_picture(self): """Delete picture of this user.""" # Recursive import from olympia.users.tasks import delete_photo storage = SafeStorage(user_media='userpics') if storage.exists(self.picture_path): delete_photo.delay(self.picture_path) if storage.exists(self.picture_path_original): delete_photo.delay(self.picture_path_original) if self.picture_type: self.update(picture_type=None)
def update(self, instance, validated_data): instance = super().update(instance, validated_data) photo = validated_data.get('picture_upload') if photo: original = instance.picture_path_original storage = SafeStorage(user_media='userpics') with storage.open(original, 'wb') as original_file: for chunk in photo.chunks(): original_file.write(chunk) instance.update(picture_type=photo.content_type) resize_photo.delay( original, instance.picture_path, set_modified_on=instance.serializable_reference(), ) return instance
def delete_photo(dst, **kw): task_log.info('[1@None] Deleting photo: %s.' % dst) if not dst.startswith(user_media_path('userpics')): task_log.error("Someone tried deleting something they shouldn't: %s" % dst) return try: SafeStorage(user_media='userpics').delete(dst) except Exception as e: task_log.error('Error deleting userpic: %s' % e)
def upload_filter(generation_time, is_base=True): bucket = settings.REMOTE_SETTINGS_WRITER_BUCKET server = RemoteSettings(bucket, REMOTE_SETTINGS_COLLECTION_MLBF, sign_off_needed=False) mlbf = MLBF.load_from_storage(generation_time) if is_base: # clear the collection for the base - we want to be the only filter server.delete_all_records() statsd.incr('blocklist.tasks.upload_filter.reset_collection') # Then the bloomfilter data = { 'key_format': MLBF.KEY_FORMAT, 'generation_time': generation_time, 'attachment_type': BLOCKLIST_RECORD_MLBF_BASE, } storage = SafeStorage(user_media='mlbf_storage') with storage.open(mlbf.filter_path, 'rb') as filter_file: attachment = ('filter.bin', filter_file, 'application/octet-stream') server.publish_attachment(data, attachment) statsd.incr('blocklist.tasks.upload_filter.upload_mlbf') statsd.incr('blocklist.tasks.upload_filter.upload_mlbf.base') else: # If we have a stash, write that stash_data = { 'key_format': MLBF.KEY_FORMAT, 'stash_time': generation_time, 'stash': mlbf.stash_json, } server.publish_record(stash_data) statsd.incr('blocklist.tasks.upload_filter.upload_stash') server.complete_session() set_config(MLBF_TIME_CONFIG_KEY, generation_time, json_value=True) if is_base: set_config(MLBF_BASE_ID_CONFIG_KEY, generation_time, json_value=True)
# We need ES indexes aliases to match prod behaviour, but also we need the # names need to stay consistent during the whole test run, so we generate # them at import time. Note that this works because pytest overrides # ES_INDEXES before the test run even begins - if we were using # override_settings() on ES_INDEXES we'd be in trouble. ES_INDEX_SUFFIXES = { key: timestamp_index('') for key in settings.ES_INDEXES.keys() } # django2.2 encodes with the decimal code; django3.2 with the hex code. SQUOTE_ESCAPED = escape("'") # A Storage instance for the filesystem root to be used during tests that read fixtures # and/or try to copy them under settings.STORAGE_ROOT. root_storage = SafeStorage(location='/') def get_es_index_name(key): """Return the name of the actual index used in tests for a given key taken from settings.ES_INDEXES. Can be used to check whether aliases have been set properly - ES_INDEXES will give the aliases, and this method will give the indices the aliases point to.""" value = settings.ES_INDEXES[key] return f'{value}{ES_INDEX_SUFFIXES[key]}' def setup_es_test_data(es): try:
def __init__(self, id_): # simplify later code by assuming always a string self.id = str(id_) self.storage = SafeStorage(user_media='mlbf_storage')
class MLBF: KEY_FORMAT = '{guid}:{version}' def __init__(self, id_): # simplify later code by assuming always a string self.id = str(id_) self.storage = SafeStorage(user_media='mlbf_storage') @classmethod def hash_filter_inputs(cls, input_list): """Returns a set""" return { cls.KEY_FORMAT.format(guid=guid, version=version) for (guid, version) in input_list } @property def _blocked_path(self): return os.path.join(settings.MLBF_STORAGE_PATH, self.id, 'blocked.json') @cached_property def blocked_items(self): raise NotImplementedError def write_blocked_items(self): blocked_path = self._blocked_path with self.storage.open(blocked_path, 'w') as json_file: log.info(f'Writing to file {blocked_path}') json.dump(self.blocked_items, json_file) @property def _not_blocked_path(self): return os.path.join(settings.MLBF_STORAGE_PATH, self.id, 'notblocked.json') @cached_property def not_blocked_items(self): raise NotImplementedError def write_not_blocked_items(self): not_blocked_path = self._not_blocked_path with self.storage.open(not_blocked_path, 'w') as json_file: log.info(f'Writing to file {not_blocked_path}') json.dump(self.not_blocked_items, json_file) @property def filter_path(self): return os.path.join(settings.MLBF_STORAGE_PATH, self.id, 'filter') @property def _stash_path(self): return os.path.join(settings.MLBF_STORAGE_PATH, self.id, 'stash.json') @cached_property def stash_json(self): with self.storage.open(self._stash_path, 'r') as json_file: return json.load(json_file) def generate_and_write_filter(self): stats = {} self.write_blocked_items() self.write_not_blocked_items() bloomfilter = generate_mlbf(stats=stats, blocked=self.blocked_items, not_blocked=self.not_blocked_items) # write bloomfilter mlbf_path = self.filter_path with self.storage.open(mlbf_path, 'wb') as filter_file: log.info(f'Writing to file {mlbf_path}') bloomfilter.tofile(filter_file) stats['mlbf_filesize'] = os.stat(mlbf_path).st_size log.info(json.dumps(stats)) @classmethod def generate_diffs(cls, previous, current): previous = set(previous) current = set(current) extras = current - previous deletes = previous - current return extras, deletes def generate_and_write_stash(self, previous_mlbf): self.write_blocked_items() self.write_not_blocked_items() # compare previous with current blocks extras, deletes = self.generate_diffs(previous_mlbf.blocked_items, self.blocked_items) self.stash_json = { 'blocked': list(extras), 'unblocked': list(deletes), } # write stash stash_path = self._stash_path with self.storage.open(stash_path, 'w') as json_file: log.info(f'Writing to file {stash_path}') json.dump(self.stash_json, json_file) def should_reset_base_filter(self, previous_bloom_filter): try: # compare base with current blocks extras, deletes = self.generate_diffs( previous_bloom_filter.blocked_items, self.blocked_items) return (len(extras) + len(deletes)) > BASE_REPLACE_THRESHOLD except FileNotFoundError: # when previous_base_mlfb._blocked_path doesn't exist return True def blocks_changed_since_previous(self, previous_bloom_filter): try: # compare base with current blocks extras, deletes = self.generate_diffs( previous_bloom_filter.blocked_items, self.blocked_items) return len(extras) + len(deletes) except FileNotFoundError: # when previous_bloom_filter._blocked_path doesn't exist return len(self.blocked_items) @classmethod def load_from_storage(cls, *args, **kwargs): return StoredMLBF(*args, **kwargs) @classmethod def generate_from_db(cls, *args, **kwargs): return DatabaseMLBF(*args, **kwargs)
def test_rm_stored_dir(): tmp = tempfile.mkdtemp(dir=settings.TMP_PATH) jn = partial(os.path.join, tmp) storage = SafeStorage(user_media='tmp') try: storage.save(jn('file1.txt'), ContentFile('<stuff>')) storage.save(jn('one/file1.txt'), ContentFile('')) storage.save(jn('one/two/file1.txt'), ContentFile('moar stuff')) storage.save(jn('one/kristi\u0107/kristi\u0107.txt'), ContentFile('')) storage.rm_stored_dir(jn('one')) assert not storage.exists(jn('one')) assert not storage.exists(jn('one/file1.txt')) assert not storage.exists(jn('one/two')) assert not storage.exists(jn('one/two/file1.txt')) assert not storage.exists(jn('one/kristi\u0107/kristi\u0107.txt')) assert storage.exists(jn('file1.txt')) finally: rm_local_tmp_dir(tmp)
def source_upload_storage(): return SafeStorage(user_media='')
def setUp(self): super().setUp() self.tmp = tempfile.mkdtemp(dir=settings.TMP_PATH) self.stor = SafeStorage()
class TestSafeStorage(TestCase): def setUp(self): super().setUp() self.tmp = tempfile.mkdtemp(dir=settings.TMP_PATH) self.stor = SafeStorage() def tearDown(self): rm_local_tmp_dir(self.tmp) super().tearDown() def test_read_write(self): fn = os.path.join(self.tmp, 'somefile.txt') with self.stor.open(fn, 'w') as fd: fd.write('stuff') with self.stor.open(fn, 'r') as fd: assert fd.read() == 'stuff' def test_non_ascii_filename(self): fn = os.path.join(self.tmp, 'Ivan Krsti\u0107.txt') with self.stor.open(fn, 'w') as fd: fd.write('stuff') with self.stor.open(fn, 'r') as fd: assert fd.read() == 'stuff' def test_non_ascii_content(self): fn = os.path.join(self.tmp, 'somefile.txt') with self.stor.open(fn, 'wb') as fd: fd.write('Ivan Krsti\u0107.txt'.encode()) with self.stor.open(fn, 'rb') as fd: assert fd.read().decode('utf8') == 'Ivan Krsti\u0107.txt' def test_make_file_dirs(self): dp = os.path.join(self.tmp, 'path', 'to') self.stor.open(os.path.join(dp, 'file.txt'), 'w').close() assert os.path.exists( self.stor.path(dp)), 'Directory not created: %r' % dp def test_do_not_make_file_dirs_when_reading(self): fpath = os.path.join(self.tmp, 'file.txt') with open(fpath, 'w') as fp: fp.write('content') # Make sure this doesn't raise an exception. self.stor.open(fpath, 'r').close() def test_make_dirs_only_once(self): dp = os.path.join(self.tmp, 'path', 'to') with self.stor.open(os.path.join(dp, 'file.txt'), 'w') as fd: fd.write('stuff') # Make sure it doesn't try to make the dir twice with self.stor.open(os.path.join(dp, 'file.txt'), 'w') as fd: fd.write('stuff') with self.stor.open(os.path.join(dp, 'file.txt'), 'r') as fd: assert fd.read() == 'stuff' def test_delete_empty_dir(self): dp = os.path.join(self.tmp, 'path') os.mkdir(dp) self.stor.delete(dp) assert not os.path.exists(dp) def test_cannot_delete_non_empty_dir(self): dp = os.path.join(self.tmp, 'path') with self.stor.open(os.path.join(dp, 'file.txt'), 'w') as fp: fp.write('stuff') self.assertRaises(OSError, self.stor.delete, dp) def test_delete_file(self): dp = os.path.join(self.tmp, 'path') fn = os.path.join(dp, 'file.txt') with self.stor.open(fn, 'w') as fp: fp.write('stuff') self.stor.delete(fn) assert not os.path.exists(fn) assert os.path.exists(dp)
def test_storage_walk(): tmp = force_str(tempfile.mkdtemp(dir=settings.TMP_PATH)) jn = partial(os.path.join, tmp) storage = SafeStorage(user_media='tmp') try: storage.save(jn('file1.txt'), ContentFile('')) storage.save(jn('one/file1.txt'), ContentFile('')) storage.save(jn('one/file2.txt'), ContentFile('')) storage.save(jn('one/two/file1.txt'), ContentFile('')) storage.save(jn('one/three/file1.txt'), ContentFile('')) storage.save(jn('four/five/file1.txt'), ContentFile('')) storage.save(jn('four/kristi\u2603/kristi\u2603.txt'), ContentFile('')) results = [(dir, set(subdirs), set(files)) for dir, subdirs, files in sorted(storage.walk(tmp))] assert results.pop(0) == (tmp, {'four', 'one'}, {'file1.txt'}) assert results.pop(0) == (jn('four'), {'five', 'kristi\u2603'}, set()) assert results.pop(0) == (jn('four/five'), set(), {'file1.txt'}) assert results.pop(0) == ( jn('four/kristi\u2603'), set(), {'kristi\u2603.txt'}, ) assert results.pop(0) == ( jn('one'), {'three', 'two'}, {'file1.txt', 'file2.txt'}, ) assert results.pop(0) == (jn('one/three'), set(), {'file1.txt'}) assert results.pop(0) == (jn('one/two'), set(), {'file1.txt'}) assert len(results) == 0 finally: rm_local_tmp_dir(tmp)
def hero_image_storage(): return SafeStorage(user_media='')
def render_to_svg(template, context, preview, thumbnail_dimensions, theme_manifest): tmp_args = { 'dir': settings.TMP_PATH, 'mode': 'wb', 'delete': not settings.DEBUG, 'suffix': '.png', } # first stage - just the images image_only_svg = template.render(context).encode('utf-8') with BytesIO() as background_blob: # write the image only background to a file and back to a blob with tempfile.NamedTemporaryFile(**tmp_args) as background_png: if not write_svg_to_png(image_only_svg, background_png.name): return # TODO: improvement - only re-encode jpg backgrounds as jpg? Image.open(background_png.name).convert('RGB').save( background_blob, 'JPEG', quality=80) # and encode the image in base64 to use in the context try: header_src, _, _ = encode_header(background_blob.getvalue(), 'jpg') except Exception as exc: log.info('Exception during svg preview generation %s', exc) return # then rebuild a context with it and render with_ui_context = { **dict( process_color_value(prop, color) for prop, color in theme_manifest.get( 'colors', {}).items()), 'amo': amo, 'header_src': header_src, 'svg_render_size': context['svg_render_size'], 'header_src_height': context['svg_render_size'].height, 'header_width': context['svg_render_size'].width, } finished_svg = template.render(with_ui_context).encode('utf-8') # and write that svg to preview.image_path storage = SafeStorage(user_media=VersionPreview.media_folder) with storage.open(preview.image_path, 'wb') as image_path: image_path.write(finished_svg) # then also write a fully rendered svg and resize for the thumbnails with tempfile.NamedTemporaryFile(**tmp_args) as complete_preview_as_png: if convert_svg_to_png(preview.image_path, complete_preview_as_png.name): resize_image( complete_preview_as_png.name, preview.thumbnail_path, thumbnail_dimensions, format=preview.get_format('thumbnail'), quality= 35, # It's ignored for png format, so it's fine to always set. ) return True
def setUp(self): super().setUp() self.tmp = tempfile.mkdtemp(dir=settings.TMP_PATH) self.storage = SafeStorage(user_media='tmp')
class TestFileOps(TestCase): def setUp(self): super().setUp() self.tmp = tempfile.mkdtemp(dir=settings.TMP_PATH) self.storage = SafeStorage(user_media='tmp') def tearDown(self): rm_local_tmp_dir(self.tmp) super().tearDown() def path(self, path): return os.path.join(self.tmp, path) def contents(self, path): with self.storage.open(path, 'rb') as fp: return fp.read() def newfile(self, name, contents): src = self.path(name) self.storage.save(src, ContentFile(contents)) return src def test_copy(self): src = self.newfile('src.txt', '<contents>') dest = self.path('somedir/dest.txt') self.storage.copy_stored_file(src, dest) assert self.contents(dest) == b'<contents>' def test_self_copy(self): src = self.newfile('src.txt', '<contents>') dest = self.path('src.txt') self.storage.copy_stored_file(src, dest) assert self.contents(dest) == b'<contents>' def test_move(self): src = self.newfile('src.txt', '<contents>') dest = self.path('somedir/dest.txt') self.storage.move_stored_file(src, dest) assert self.contents(dest) == b'<contents>' assert not self.storage.exists(src) def test_non_ascii(self): src = self.newfile('kristi\u0107.txt', 'ivan kristi\u0107'.encode()) dest = self.path('somedir/kristi\u0107.txt') self.storage.copy_stored_file(src, dest) assert self.contents(dest) == b'ivan kristi\xc4\x87' def test_copy_chunking(self): src = self.newfile('src.txt', '<contents>') dest = self.path('somedir/dest.txt') self.storage.copy_stored_file(src, dest, chunk_size=1) assert self.contents(dest) == b'<contents>' def test_move_chunking(self): src = self.newfile('src.txt', '<contents>') dest = self.path('somedir/dest.txt') self.storage.move_stored_file(src, dest, chunk_size=1) assert self.contents(dest) == b'<contents>' assert not self.storage.exists(src)