Esempio n. 1
0
    def test_dataframe(self):
        repo = LocalFileRepository(Path(os.path.dirname(__file__)) / Path("titanic.csv"))
        data = DataFrameData.load(repo)

        db_repo = PandasDbRepository(self.engine, "titanic")

        dfd = DataFrameData(data.content, db_repo)
        dfd.save()

        dfd2 = DataFrameData.load(db_repo)
        self.assertIsNotNone(dfd2.content)
        self.assertTrue(data.content.equals(dfd2.content))
Esempio n. 2
0
    def test_dataframe(self):
        # ローカルのファイルを読んでS3に保存
        repo = LocalFileRepository(
            Path(os.path.dirname(__file__)) / Path("titanic.csv"))

        data = DataFrameData.load(repo)

        repo_s3 = S3FileRepository(self.access_key_id, self.secret_access_key,
                                   self.s3file_url)

        data.repository = repo_s3
        data.save()

        # S3からファイルを読み込み
        data2 = DataFrameData.load(repo_s3)
        self.assertTrue(len(data2.content) > 0)
Esempio n. 3
0
    def main(self, ds):
        repo = LocalFileRepository(
            Path(os.path.dirname(__file__)) / Path("../titanic.csv"))
        titanic_data = DataFrameData.load(repo)

        ds = DataSet()
        ds.put("titanic", titanic_data)
        return ds
    def table_init(self):
        repo = LocalFileRepository(
            Path(os.path.dirname(__file__)) / Path("titanic.csv"))
        data = DataFrameData.load(repo)

        repo_s = SqlAlchemyRepository(self.engine)

        md = SqlAlchemyModelData(repo_s, Titanic)
        md.update_dataframe(data.content)
        md.save()
Esempio n. 5
0
def prepare_db(engine):
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

    repo = LocalFileRepository(
        Path(os.path.dirname(__file__)) / Path("../titanic.csv"))
    titanic_data = DataFrameData.load(repo)

    repo_s = SqlAlchemyRepository(engine)

    md = SqlAlchemyModelData(repo_s, Titanic)
    md.update_dataframe(titanic_data.content)
    md.save()
Esempio n. 6
0
    def test_dataframe(self):
        repo = LocalFileRepository(
            Path(os.path.dirname(__file__)) / Path("titanic.csv"))

        data = DataFrameData.load(repo)

        tmp_path = Path(tempfile.gettempdir()) / Path(
            next(tempfile._get_candidate_names()))
        repo = LocalFileRepository(tmp_path)
        data.repository = repo
        data.save()

        self.assertTrue(tmp_path.exists())
        self.assertTrue(tmp_path.is_file())
Esempio n. 7
0
        df.loc[df["Embarked"] == "C", "Embarked"] = 1
        df.loc[df["Embarked"] == "Q", "Embarked"] = 2

        ds = DataSet()
        ds.put("titanic", DataFrameData(df))
        return ds


if __name__ == "__main__":
    basicConfig(level=DEBUG)

    # データセットの読み込み
    ds = DataSet()
    repo = LocalFileRepository(
        Path(os.path.dirname(__file__)) / Path("../titanic.csv"))
    titanic_data = DataFrameData.load(repo)
    ds.put("titanic", titanic_data)

    #
    print("## Original data")
    print(ds.get("titanic").content)

    # Graphで処理する
    # Age欠損埋め -> 性別のコード化 -> 乗船した港 のコード化 の順で処理
    graph = Graph()
    fill_age = graph.append(FillNaMedian("Age"))
    sex_to_code = graph.append(SexToCode(), [fill_age])
    graph.append(EmbarkedToCode(), [sex_to_code])
    ds = graph.run(ds)

    print("## Processed data")