Beispiel #1
0
def test_random_access():
    """Test basic random access
    TODO: test random access when time info is not provided directly
    """
    with tempfile.TemporaryDirectory() as temp_dir:
        # Hack to enable testing if only required chunks are loaded
        Peaks.rechunk_on_save = False

        st = strax.Context(storage=strax.DataDirectory(temp_dir),
                           register=[Records, Peaks])

        with pytest.raises(strax.DataNotAvailable):
            # Time range selection requires data already available
            st.get_df(run_id, 'peaks', time_range=(3, 5))

        st.make(run_id=run_id, targets='peaks')

        # Second part of hack: corrupt data by removing one chunk
        dirname = str(st._key_for(run_id, 'peaks'))
        os.remove(
            os.path.join(temp_dir, dirname,
                         strax.dirname_to_prefix(dirname) + '-000000'))

        with pytest.raises(FileNotFoundError):
            st.get_array(run_id, 'peaks')

        df = st.get_array(run_id, 'peaks', time_range=(3, 5))
        assert len(df) == 2 * recs_per_chunk
        assert df['time'].min() == 3
        assert df['time'].max() == 4
Beispiel #2
0
def test_filestore():
    with tempfile.TemporaryDirectory() as temp_dir:
        mystrax = strax.Context(storage=strax.DataDirectory(temp_dir),
                                register=[Records, Peaks])

        assert not mystrax.is_stored(run_id, 'peaks')
        mystrax.scan_runs()
        assert mystrax.list_available('peaks') == []

        mystrax.make(run_id=run_id, targets='peaks')

        assert mystrax.is_stored(run_id, 'peaks')
        mystrax.scan_runs()
        assert mystrax.list_available('peaks') == [run_id]
        assert mystrax.scan_runs()['name'].values.tolist() == [run_id]

        # We should have two directories
        data_dirs = sorted(glob.glob(osp.join(temp_dir, '*/')))
        assert len(data_dirs) == 2

        # The first dir contains peaks.
        # It should have one data chunk (rechunk is on) and a metadata file
        prefix = strax.dirname_to_prefix(data_dirs[0])
        assert sorted(os.listdir(
            data_dirs[0])) == [f'{prefix}-000000', f'{prefix}-metadata.json']

        # Check metadata got written correctly.
        metadata = mystrax.get_meta(run_id, 'peaks')
        assert len(metadata)
        assert 'writing_ended' in metadata
        assert 'exception' not in metadata
        assert len(metadata['chunks']) == 1

        # Check data gets loaded from cache, not rebuilt
        md_filename = osp.join(data_dirs[0], f'{prefix}-metadata.json')
        mtime_before = osp.getmtime(md_filename)
        df = mystrax.get_array(run_id=run_id, targets='peaks')
        assert len(df) == recs_per_chunk * n_chunks
        assert mtime_before == osp.getmtime(md_filename)

        # Test the zipfile store. Zipping is still awkward...
        zf = osp.join(temp_dir, f'{run_id}.zip')
        strax.ZipDirectory.zip_dir(temp_dir, zf, delete=True)
        assert osp.exists(zf)

        mystrax = strax.Context(storage=strax.ZipDirectory(temp_dir),
                                register=[Records, Peaks])
        metadata_2 = mystrax.get_meta(run_id, 'peaks')
        assert metadata == metadata_2
Beispiel #3
0
    def _find(self, key, write, allow_incomplete, fuzzy_for,
              fuzzy_for_options):
        assert not write

        # Check exact match / write case
        bk = self._backend_key(key)
        with zipfile.ZipFile(self._zipname(key)) as zp:
            try:
                dirname = str(key)
                prefix = strax.dirname_to_prefix(dirname)
                zp.getinfo(f'{dirname}/{prefix}-metadata.json')
                return bk
            except KeyError:
                pass

            if not len(fuzzy_for) and not len(fuzzy_for_options):
                raise strax.DataNotAvailable

        raise NotImplementedError("Fuzzy matching within zipfiles not yet "
                                  "implemented")
Beispiel #4
0
    def test_raise_corruption(self):
        self.set_save_when('ALWAYS')
        self.st.make(self.test_run_id, self.target)
        assert self.is_stored()
        storage = self.st.storage[0]
        data_key = self.st.key_for(self.test_run_id, self.target)
        data_path = os.path.join(storage.path, str(data_key))
        assert os.path.exists(data_path)
        metadata = storage.backends[0].get_metadata(data_path)
        assert isinstance(metadata, dict)

        # copied from FileSytemBackend (maybe abstractify the method separately?)
        prefix = strax.dirname_to_prefix(data_path)
        metadata_json = f'{prefix}-metadata.json'
        md_path = os.path.join(data_path, metadata_json)
        assert os.path.exists(md_path)

        # Corrupt the metadata (making it non-JSON parsable)
        md_file = open(md_path, 'a')
        # Append 'hello' at the end of file
        md_file.write(
            'Adding a non-JSON line to the file to corrupt the metadata')
        # Close the file
        md_file.close()

        # Now we should get an error since the metadata data is corrupted
        with self.assertRaises(strax.DataCorrupted):
            self.st.get_array(self.test_run_id, self.target)

        # Also test the error is raised if be build a target that depends on corrupted data
        self.st.register(Peaks)
        with self.assertRaises(strax.DataCorrupted):
            self.st.get_array(self.test_run_id, 'peaks')

        # Cleanup if someone wants to re-use this self.st
        del self.st._plugin_class_registry['peaks']
Beispiel #5
0
 def _get_metadata(self, zipn_and_dirn):
     zipn, dirn = zipn_and_dirn
     with zipfile.ZipFile(zipn) as zp:
         prefix = strax.dirname_to_prefix(dirn)
         with zp.open(f'{dirn}/{prefix}-metadata.json') as f:
             return json.loads(f.read())