def test_database_from_dataframes():
    with NamedTemporaryFile(suffix="test.db") as f:
        db = db_from_dataframes(
            db_filename=f.name,
            dataframes={"A": dfA, "B": dfB},
            primary_keys={"A": "numbers"},
            indices={"A": [("numbers", "strings")]},
            subdir="test_datacache")
        cursor_A = db.execute("SELECT * FROM A")
        results_A = cursor_A.fetchall()
        eq_(results_A, [(1, "a"), (2, "b"), (3, "c")])
        cursor_B = db.execute("SELECT * FROM B")
        results_B = cursor_B.fetchall()
        eq_(results_B, [("nuzzle",), ("ruzzle",)])
Beispiel #2
0
def test_database_from_dataframes():
    with NamedTemporaryFile(suffix="test.db") as f:
        db = db_from_dataframes(db_filename=f.name,
                                dataframes={
                                    "A": dfA,
                                    "B": dfB
                                },
                                primary_keys={"A": "numbers"},
                                indices={"A": [("numbers", "strings")]},
                                subdir="test_datacache")
        cursor_A = db.execute("SELECT * FROM A")
        results_A = cursor_A.fetchall()
        eq_(results_A, [(1, "a"), (2, "b"), (3, "c")])
        cursor_B = db.execute("SELECT * FROM B")
        results_B = cursor_B.fetchall()
        eq_(results_B, [("nuzzle", ), ("ruzzle", )])
Beispiel #3
0
    def _create_database(self, force=False):
        print("Creating database: %s" % self.local_db_path())
        filename = self.local_db_filename()
        df = self.gtf.dataframe()

        all_index_groups = self._all_possible_indices(df.columns)

        # split single DataFrame into dictionary mapping each unique
        # feature name onto that subset of the data
        feature_names = df["feature"].unique()
        dataframes = {}
        # every table gets the same set of indices
        indices_dict = {}
        # if a feature has an ID then make it that table's primary key
        primary_keys = {}

        for feature in feature_names:
            df_subset = df[df.feature == feature]
            dataframes[feature] = df_subset

            primary_key = self._get_primary_key(feature, df_subset)
            if primary_key:
                primary_keys[feature] = primary_key

            indices_dict[feature] = self._feature_indices(all_index_groups, primary_key, df_subset)

        self._connection = datacache.db_from_dataframes(
            db_filename=filename,
            dataframes=dataframes,
            indices=indices_dict,
            primary_keys=primary_keys,
            subdir=CACHE_SUBDIR,
            overwrite=force,
            version=DATABASE_SCHEMA_VERSION,
        )
        return self._connection