Exemple #1
0
def cleanup_old_files(*, base_filter_id):
    log.info('Starting clean up of old MLBF folders...')
    six_months_ago = datetime_to_ts(datetime.now() - timedelta(weeks=26))
    base_filter_ts = int(base_filter_id)
    storage = SafeStorage(user_media='mlbf_storage')
    for dir in storage.listdir(settings.MLBF_STORAGE_PATH)[0]:
        dir = force_str(dir)
        # skip non-numeric folder names
        if not dir.isdigit():
            log.info('Skipping %s because not a timestamp', dir)
            continue
        dir_ts = int(dir)
        dir_as_date = datetime.fromtimestamp(dir_ts / 1000)
        # delete if >6 months old and <base_filter_id
        if dir_ts > six_months_ago:
            log.info('Skipping %s because < 6 months old (%s)', dir,
                     dir_as_date)
        elif dir_ts > base_filter_ts:
            log.info(
                'Skipping %s because more recent (%s) than base mlbf (%s)',
                dir,
                dir_as_date,
                datetime.fromtimestamp(base_filter_ts / 1000),
            )
        else:
            log.info('Deleting %s because > 6 months old (%s)', dir,
                     dir_as_date)
            storage.rm_stored_dir(os.path.join(settings.MLBF_STORAGE_PATH,
                                               dir))
Exemple #2
0
    def delete_picture(self):
        """Delete picture of this user."""
        # Recursive import
        from olympia.users.tasks import delete_photo

        storage = SafeStorage(user_media='userpics')

        if storage.exists(self.picture_path):
            delete_photo.delay(self.picture_path)

        if storage.exists(self.picture_path_original):
            delete_photo.delay(self.picture_path_original)

        if self.picture_type:
            self.update(picture_type=None)
Exemple #3
0
    def update(self, instance, validated_data):
        instance = super().update(instance, validated_data)

        photo = validated_data.get('picture_upload')
        if photo:
            original = instance.picture_path_original

            storage = SafeStorage(user_media='userpics')
            with storage.open(original, 'wb') as original_file:
                for chunk in photo.chunks():
                    original_file.write(chunk)
            instance.update(picture_type=photo.content_type)
            resize_photo.delay(
                original,
                instance.picture_path,
                set_modified_on=instance.serializable_reference(),
            )
        return instance
Exemple #4
0
def delete_photo(dst, **kw):
    task_log.info('[1@None] Deleting photo: %s.' % dst)

    if not dst.startswith(user_media_path('userpics')):
        task_log.error("Someone tried deleting something they shouldn't: %s" %
                       dst)
        return

    try:
        SafeStorage(user_media='userpics').delete(dst)
    except Exception as e:
        task_log.error('Error deleting userpic: %s' % e)
Exemple #5
0
def upload_filter(generation_time, is_base=True):
    bucket = settings.REMOTE_SETTINGS_WRITER_BUCKET
    server = RemoteSettings(bucket,
                            REMOTE_SETTINGS_COLLECTION_MLBF,
                            sign_off_needed=False)
    mlbf = MLBF.load_from_storage(generation_time)
    if is_base:
        # clear the collection for the base - we want to be the only filter
        server.delete_all_records()
        statsd.incr('blocklist.tasks.upload_filter.reset_collection')
        # Then the bloomfilter
        data = {
            'key_format': MLBF.KEY_FORMAT,
            'generation_time': generation_time,
            'attachment_type': BLOCKLIST_RECORD_MLBF_BASE,
        }
        storage = SafeStorage(user_media='mlbf_storage')
        with storage.open(mlbf.filter_path, 'rb') as filter_file:
            attachment = ('filter.bin', filter_file,
                          'application/octet-stream')
            server.publish_attachment(data, attachment)
            statsd.incr('blocklist.tasks.upload_filter.upload_mlbf')
        statsd.incr('blocklist.tasks.upload_filter.upload_mlbf.base')
    else:
        # If we have a stash, write that
        stash_data = {
            'key_format': MLBF.KEY_FORMAT,
            'stash_time': generation_time,
            'stash': mlbf.stash_json,
        }
        server.publish_record(stash_data)
        statsd.incr('blocklist.tasks.upload_filter.upload_stash')

    server.complete_session()
    set_config(MLBF_TIME_CONFIG_KEY, generation_time, json_value=True)
    if is_base:
        set_config(MLBF_BASE_ID_CONFIG_KEY, generation_time, json_value=True)
Exemple #6
0
# We need ES indexes aliases to match prod behaviour, but also we need the
# names need to stay consistent during the whole test run, so we generate
# them at import time. Note that this works because pytest overrides
# ES_INDEXES before the test run even begins - if we were using
# override_settings() on ES_INDEXES we'd be in trouble.
ES_INDEX_SUFFIXES = {
    key: timestamp_index('')
    for key in settings.ES_INDEXES.keys()
}

# django2.2 encodes with the decimal code; django3.2 with the hex code.
SQUOTE_ESCAPED = escape("'")

# A Storage instance for the filesystem root to be used during tests that read fixtures
# and/or try to copy them under settings.STORAGE_ROOT.
root_storage = SafeStorage(location='/')


def get_es_index_name(key):
    """Return the name of the actual index used in tests for a given key
    taken from settings.ES_INDEXES.

    Can be used to check whether aliases have been set properly -
    ES_INDEXES will give the aliases, and this method will give the indices
    the aliases point to."""
    value = settings.ES_INDEXES[key]
    return f'{value}{ES_INDEX_SUFFIXES[key]}'


def setup_es_test_data(es):
    try:
Exemple #7
0
 def __init__(self, id_):
     # simplify later code by assuming always a string
     self.id = str(id_)
     self.storage = SafeStorage(user_media='mlbf_storage')
Exemple #8
0
class MLBF:
    KEY_FORMAT = '{guid}:{version}'

    def __init__(self, id_):
        # simplify later code by assuming always a string
        self.id = str(id_)
        self.storage = SafeStorage(user_media='mlbf_storage')

    @classmethod
    def hash_filter_inputs(cls, input_list):
        """Returns a set"""
        return {
            cls.KEY_FORMAT.format(guid=guid, version=version)
            for (guid, version) in input_list
        }

    @property
    def _blocked_path(self):
        return os.path.join(settings.MLBF_STORAGE_PATH, self.id,
                            'blocked.json')

    @cached_property
    def blocked_items(self):
        raise NotImplementedError

    def write_blocked_items(self):
        blocked_path = self._blocked_path
        with self.storage.open(blocked_path, 'w') as json_file:
            log.info(f'Writing to file {blocked_path}')
            json.dump(self.blocked_items, json_file)

    @property
    def _not_blocked_path(self):
        return os.path.join(settings.MLBF_STORAGE_PATH, self.id,
                            'notblocked.json')

    @cached_property
    def not_blocked_items(self):
        raise NotImplementedError

    def write_not_blocked_items(self):
        not_blocked_path = self._not_blocked_path
        with self.storage.open(not_blocked_path, 'w') as json_file:
            log.info(f'Writing to file {not_blocked_path}')
            json.dump(self.not_blocked_items, json_file)

    @property
    def filter_path(self):
        return os.path.join(settings.MLBF_STORAGE_PATH, self.id, 'filter')

    @property
    def _stash_path(self):
        return os.path.join(settings.MLBF_STORAGE_PATH, self.id, 'stash.json')

    @cached_property
    def stash_json(self):
        with self.storage.open(self._stash_path, 'r') as json_file:
            return json.load(json_file)

    def generate_and_write_filter(self):
        stats = {}

        self.write_blocked_items()
        self.write_not_blocked_items()

        bloomfilter = generate_mlbf(stats=stats,
                                    blocked=self.blocked_items,
                                    not_blocked=self.not_blocked_items)

        # write bloomfilter
        mlbf_path = self.filter_path
        with self.storage.open(mlbf_path, 'wb') as filter_file:
            log.info(f'Writing to file {mlbf_path}')
            bloomfilter.tofile(filter_file)
            stats['mlbf_filesize'] = os.stat(mlbf_path).st_size

        log.info(json.dumps(stats))

    @classmethod
    def generate_diffs(cls, previous, current):
        previous = set(previous)
        current = set(current)
        extras = current - previous
        deletes = previous - current
        return extras, deletes

    def generate_and_write_stash(self, previous_mlbf):
        self.write_blocked_items()
        self.write_not_blocked_items()

        # compare previous with current blocks
        extras, deletes = self.generate_diffs(previous_mlbf.blocked_items,
                                              self.blocked_items)
        self.stash_json = {
            'blocked': list(extras),
            'unblocked': list(deletes),
        }
        # write stash
        stash_path = self._stash_path
        with self.storage.open(stash_path, 'w') as json_file:
            log.info(f'Writing to file {stash_path}')
            json.dump(self.stash_json, json_file)

    def should_reset_base_filter(self, previous_bloom_filter):
        try:
            # compare base with current blocks
            extras, deletes = self.generate_diffs(
                previous_bloom_filter.blocked_items, self.blocked_items)
            return (len(extras) + len(deletes)) > BASE_REPLACE_THRESHOLD
        except FileNotFoundError:
            # when previous_base_mlfb._blocked_path doesn't exist
            return True

    def blocks_changed_since_previous(self, previous_bloom_filter):
        try:
            # compare base with current blocks
            extras, deletes = self.generate_diffs(
                previous_bloom_filter.blocked_items, self.blocked_items)
            return len(extras) + len(deletes)
        except FileNotFoundError:
            # when previous_bloom_filter._blocked_path doesn't exist
            return len(self.blocked_items)

    @classmethod
    def load_from_storage(cls, *args, **kwargs):
        return StoredMLBF(*args, **kwargs)

    @classmethod
    def generate_from_db(cls, *args, **kwargs):
        return DatabaseMLBF(*args, **kwargs)
Exemple #9
0
def test_rm_stored_dir():
    tmp = tempfile.mkdtemp(dir=settings.TMP_PATH)
    jn = partial(os.path.join, tmp)
    storage = SafeStorage(user_media='tmp')
    try:
        storage.save(jn('file1.txt'), ContentFile('<stuff>'))
        storage.save(jn('one/file1.txt'), ContentFile(''))
        storage.save(jn('one/two/file1.txt'), ContentFile('moar stuff'))
        storage.save(jn('one/kristi\u0107/kristi\u0107.txt'), ContentFile(''))

        storage.rm_stored_dir(jn('one'))

        assert not storage.exists(jn('one'))
        assert not storage.exists(jn('one/file1.txt'))
        assert not storage.exists(jn('one/two'))
        assert not storage.exists(jn('one/two/file1.txt'))
        assert not storage.exists(jn('one/kristi\u0107/kristi\u0107.txt'))
        assert storage.exists(jn('file1.txt'))
    finally:
        rm_local_tmp_dir(tmp)
Exemple #10
0
def source_upload_storage():
    return SafeStorage(user_media='')
 def setUp(self):
     super().setUp()
     self.tmp = tempfile.mkdtemp(dir=settings.TMP_PATH)
     self.stor = SafeStorage()
class TestSafeStorage(TestCase):
    def setUp(self):
        super().setUp()
        self.tmp = tempfile.mkdtemp(dir=settings.TMP_PATH)
        self.stor = SafeStorage()

    def tearDown(self):
        rm_local_tmp_dir(self.tmp)
        super().tearDown()

    def test_read_write(self):
        fn = os.path.join(self.tmp, 'somefile.txt')
        with self.stor.open(fn, 'w') as fd:
            fd.write('stuff')
        with self.stor.open(fn, 'r') as fd:
            assert fd.read() == 'stuff'

    def test_non_ascii_filename(self):
        fn = os.path.join(self.tmp, 'Ivan Krsti\u0107.txt')
        with self.stor.open(fn, 'w') as fd:
            fd.write('stuff')
        with self.stor.open(fn, 'r') as fd:
            assert fd.read() == 'stuff'

    def test_non_ascii_content(self):
        fn = os.path.join(self.tmp, 'somefile.txt')
        with self.stor.open(fn, 'wb') as fd:
            fd.write('Ivan Krsti\u0107.txt'.encode())
        with self.stor.open(fn, 'rb') as fd:
            assert fd.read().decode('utf8') == 'Ivan Krsti\u0107.txt'

    def test_make_file_dirs(self):
        dp = os.path.join(self.tmp, 'path', 'to')
        self.stor.open(os.path.join(dp, 'file.txt'), 'w').close()
        assert os.path.exists(
            self.stor.path(dp)), 'Directory not created: %r' % dp

    def test_do_not_make_file_dirs_when_reading(self):
        fpath = os.path.join(self.tmp, 'file.txt')
        with open(fpath, 'w') as fp:
            fp.write('content')
        # Make sure this doesn't raise an exception.
        self.stor.open(fpath, 'r').close()

    def test_make_dirs_only_once(self):
        dp = os.path.join(self.tmp, 'path', 'to')
        with self.stor.open(os.path.join(dp, 'file.txt'), 'w') as fd:
            fd.write('stuff')
        # Make sure it doesn't try to make the dir twice
        with self.stor.open(os.path.join(dp, 'file.txt'), 'w') as fd:
            fd.write('stuff')
        with self.stor.open(os.path.join(dp, 'file.txt'), 'r') as fd:
            assert fd.read() == 'stuff'

    def test_delete_empty_dir(self):
        dp = os.path.join(self.tmp, 'path')
        os.mkdir(dp)
        self.stor.delete(dp)
        assert not os.path.exists(dp)

    def test_cannot_delete_non_empty_dir(self):
        dp = os.path.join(self.tmp, 'path')
        with self.stor.open(os.path.join(dp, 'file.txt'), 'w') as fp:
            fp.write('stuff')
        self.assertRaises(OSError, self.stor.delete, dp)

    def test_delete_file(self):
        dp = os.path.join(self.tmp, 'path')
        fn = os.path.join(dp, 'file.txt')
        with self.stor.open(fn, 'w') as fp:
            fp.write('stuff')
        self.stor.delete(fn)
        assert not os.path.exists(fn)
        assert os.path.exists(dp)
Exemple #13
0
def test_storage_walk():
    tmp = force_str(tempfile.mkdtemp(dir=settings.TMP_PATH))
    jn = partial(os.path.join, tmp)
    storage = SafeStorage(user_media='tmp')
    try:
        storage.save(jn('file1.txt'), ContentFile(''))
        storage.save(jn('one/file1.txt'), ContentFile(''))
        storage.save(jn('one/file2.txt'), ContentFile(''))
        storage.save(jn('one/two/file1.txt'), ContentFile(''))
        storage.save(jn('one/three/file1.txt'), ContentFile(''))
        storage.save(jn('four/five/file1.txt'), ContentFile(''))
        storage.save(jn('four/kristi\u2603/kristi\u2603.txt'), ContentFile(''))

        results = [(dir, set(subdirs), set(files))
                   for dir, subdirs, files in sorted(storage.walk(tmp))]

        assert results.pop(0) == (tmp, {'four', 'one'}, {'file1.txt'})
        assert results.pop(0) == (jn('four'), {'five', 'kristi\u2603'}, set())
        assert results.pop(0) == (jn('four/five'), set(), {'file1.txt'})
        assert results.pop(0) == (
            jn('four/kristi\u2603'),
            set(),
            {'kristi\u2603.txt'},
        )
        assert results.pop(0) == (
            jn('one'),
            {'three', 'two'},
            {'file1.txt', 'file2.txt'},
        )
        assert results.pop(0) == (jn('one/three'), set(), {'file1.txt'})
        assert results.pop(0) == (jn('one/two'), set(), {'file1.txt'})
        assert len(results) == 0
    finally:
        rm_local_tmp_dir(tmp)
Exemple #14
0
def hero_image_storage():
    return SafeStorage(user_media='')
Exemple #15
0
def render_to_svg(template, context, preview, thumbnail_dimensions,
                  theme_manifest):
    tmp_args = {
        'dir': settings.TMP_PATH,
        'mode': 'wb',
        'delete': not settings.DEBUG,
        'suffix': '.png',
    }

    # first stage - just the images
    image_only_svg = template.render(context).encode('utf-8')

    with BytesIO() as background_blob:
        # write the image only background to a file and back to a blob
        with tempfile.NamedTemporaryFile(**tmp_args) as background_png:
            if not write_svg_to_png(image_only_svg, background_png.name):
                return
            # TODO: improvement - only re-encode jpg backgrounds as jpg?
            Image.open(background_png.name).convert('RGB').save(
                background_blob, 'JPEG', quality=80)

        # and encode the image in base64 to use in the context
        try:
            header_src, _, _ = encode_header(background_blob.getvalue(), 'jpg')
        except Exception as exc:
            log.info('Exception during svg preview generation %s', exc)
            return

    # then rebuild a context with it and render
    with_ui_context = {
        **dict(
            process_color_value(prop, color) for prop, color in theme_manifest.get(
                'colors', {}).items()),
        'amo':
        amo,
        'header_src':
        header_src,
        'svg_render_size':
        context['svg_render_size'],
        'header_src_height':
        context['svg_render_size'].height,
        'header_width':
        context['svg_render_size'].width,
    }
    finished_svg = template.render(with_ui_context).encode('utf-8')

    # and write that svg to preview.image_path
    storage = SafeStorage(user_media=VersionPreview.media_folder)
    with storage.open(preview.image_path, 'wb') as image_path:
        image_path.write(finished_svg)

    # then also write a fully rendered svg and resize for the thumbnails
    with tempfile.NamedTemporaryFile(**tmp_args) as complete_preview_as_png:
        if convert_svg_to_png(preview.image_path,
                              complete_preview_as_png.name):
            resize_image(
                complete_preview_as_png.name,
                preview.thumbnail_path,
                thumbnail_dimensions,
                format=preview.get_format('thumbnail'),
                quality=
                35,  # It's ignored for png format, so it's fine to always set.
            )
            return True
Exemple #16
0
 def setUp(self):
     super().setUp()
     self.tmp = tempfile.mkdtemp(dir=settings.TMP_PATH)
     self.storage = SafeStorage(user_media='tmp')
Exemple #17
0
class TestFileOps(TestCase):
    def setUp(self):
        super().setUp()
        self.tmp = tempfile.mkdtemp(dir=settings.TMP_PATH)
        self.storage = SafeStorage(user_media='tmp')

    def tearDown(self):
        rm_local_tmp_dir(self.tmp)
        super().tearDown()

    def path(self, path):
        return os.path.join(self.tmp, path)

    def contents(self, path):
        with self.storage.open(path, 'rb') as fp:
            return fp.read()

    def newfile(self, name, contents):
        src = self.path(name)
        self.storage.save(src, ContentFile(contents))
        return src

    def test_copy(self):
        src = self.newfile('src.txt', '<contents>')
        dest = self.path('somedir/dest.txt')
        self.storage.copy_stored_file(src, dest)
        assert self.contents(dest) == b'<contents>'

    def test_self_copy(self):
        src = self.newfile('src.txt', '<contents>')
        dest = self.path('src.txt')
        self.storage.copy_stored_file(src, dest)
        assert self.contents(dest) == b'<contents>'

    def test_move(self):
        src = self.newfile('src.txt', '<contents>')
        dest = self.path('somedir/dest.txt')
        self.storage.move_stored_file(src, dest)
        assert self.contents(dest) == b'<contents>'
        assert not self.storage.exists(src)

    def test_non_ascii(self):
        src = self.newfile('kristi\u0107.txt', 'ivan kristi\u0107'.encode())
        dest = self.path('somedir/kristi\u0107.txt')
        self.storage.copy_stored_file(src, dest)
        assert self.contents(dest) == b'ivan kristi\xc4\x87'

    def test_copy_chunking(self):
        src = self.newfile('src.txt', '<contents>')
        dest = self.path('somedir/dest.txt')
        self.storage.copy_stored_file(src, dest, chunk_size=1)
        assert self.contents(dest) == b'<contents>'

    def test_move_chunking(self):
        src = self.newfile('src.txt', '<contents>')
        dest = self.path('somedir/dest.txt')
        self.storage.move_stored_file(src, dest, chunk_size=1)
        assert self.contents(dest) == b'<contents>'
        assert not self.storage.exists(src)