def matrix_stores(): df = pd.DataFrame.from_dict(DATA_DICT).set_index(MatrixStore.indices) with tempfile.TemporaryDirectory() as tmpdir: project_storage = ProjectStorage(tmpdir) tmpcsv = os.path.join(tmpdir, "df.csv.gz") tmpyaml = os.path.join(tmpdir, "df.yaml") with open(tmpyaml, "w") as outfile: yaml.dump(METADATA, outfile, default_flow_style=False) df.to_csv(tmpcsv, compression="gzip") csv = CSVMatrixStore(project_storage, [], "df") # first test with caching with csv.cache(): yield csv # with the caching out of scope they will be nuked # and this last version will not have any cache yield csv
def matrix_stores(): df = pd.DataFrame.from_dict(DATA_DICT).set_index(["entity_id"]) with tempfile.TemporaryDirectory() as tmpdir: project_storage = ProjectStorage(tmpdir) tmpcsv = os.path.join(tmpdir, "df.csv") tmpyaml = os.path.join(tmpdir, "df.yaml") tmphdf = os.path.join(tmpdir, "df.h5") with open(tmpyaml, "w") as outfile: yaml.dump(METADATA, outfile, default_flow_style=False) df.to_csv(tmpcsv) df.to_hdf(tmphdf, "matrix") csv = CSVMatrixStore(project_storage, [], "df") hdf = HDFMatrixStore(project_storage, [], "df") assert csv.design_matrix.equals(hdf.design_matrix) # first test with caching with csv.cache(), hdf.cache(): yield csv yield hdf # with the caching out of scope they will be nuked # and these last two versions will not have any cache yield csv yield hdf