Beispiel #1
0
    def main(self, ds):
        df = ds.get("titanic").content
        df[self.column] = df[self.column].fillna(df[self.column].median())

        rds = DataSet()
        rds.put(f"fillna_{self.column}", DataFrameData(df[self.column]))
        return rds
Beispiel #2
0
    def main(self, ds):
        df = ds.get("titanic").content
        df.loc[df["Sex"] == "male", "Sex"] = 0
        df.loc[df["Sex"] == "female", "Sex"] = 1

        rds = DataSet()
        rds.put("sex_to_code", DataFrameData(df["Sex"]))
        return rds
Beispiel #3
0
    def main(self, ds):
        repo = LocalFileRepository(
            Path(os.path.dirname(__file__)) / Path("../titanic.csv"))
        titanic_data = DataFrameData.load(repo)

        ds = DataSet()
        ds.put("titanic", titanic_data)
        return ds
Beispiel #4
0
    def main(self, ds):
        df = ds.get("titanic").content
        df.loc[df["Sex"] == "male", "Sex"] = 0
        df.loc[df["Sex"] == "female", "Sex"] = 1

        ds = DataSet()
        ds.put("titanic", DataFrameData(df))
        return ds
Beispiel #5
0
    def main(self, ds):
        df = ds.get("titanic").content
        df["Embarked"] = df["Embarked"].fillna("S")
        df.loc[df["Embarked"] == "S", "Embarked"] = 0
        df.loc[df["Embarked"] == "C", "Embarked"] = 1
        df.loc[df["Embarked"] == "Q", "Embarked"] = 2

        rds = DataSet()
        rds.put("embarked_to_code", DataFrameData(df["Embarked"]))
        return rds
Beispiel #6
0
    def main(self, ds):
        df = ds.get("titanic").content
        df["Sex"][df["Sex"] == "male"] = 0
        df["Sex"][df["Sex"] == "female"] = 1

        rds = DataSet()
        rds.put("sex_to_code", DataFrameData(df["Sex"]))

        time.sleep(random.randint(3, 10))
        return rds
Beispiel #7
0
    def main(self, ds):
        df = ds.get("titanic").content

        df = df.drop(["Age", "Sex", "Embarked"], axis=1)
        df = df.join(ds.get("fillna_Age").content)
        df = df.join(ds.get("sex_to_code").content)
        df = df.join(ds.get("embarked_to_code").content)

        rds = DataSet()
        rds.put("titanic_result", DataFrameData(df))
        return rds
Beispiel #8
0
    def main(self, ds):
        df = ds.get("titanic").content
        df["Embarked"] = df["Embarked"].fillna("S")
        df["Embarked"][df["Embarked"] == "S"] = 0
        df["Embarked"][df["Embarked"] == "C"] = 1
        df["Embarked"][df["Embarked"] == "Q"] = 2

        rds = DataSet()
        rds.put("embarked_to_code", DataFrameData(df["Embarked"]))

        time.sleep(random.randint(3, 10))
        return rds
Beispiel #9
0
        df.loc[df["Embarked"] == "Q", "Embarked"] = 2

        ds = DataSet()
        ds.put("titanic", DataFrameData(df))
        return ds


if __name__ == "__main__":
    basicConfig(level=DEBUG)

    # データセットの読み込み
    ds = DataSet()
    repo = LocalFileRepository(
        Path(os.path.dirname(__file__)) / Path("../titanic.csv"))
    titanic_data = DataFrameData.load(repo)
    ds.put("titanic", titanic_data)

    #
    print("## Original data")
    print(ds.get("titanic").content)

    # Graphで処理する
    # Age欠損埋め -> 性別のコード化 -> 乗船した港 のコード化 の順で処理
    graph = Graph()
    fill_age = graph.append(FillNaMedian("Age"))
    sex_to_code = graph.append(SexToCode(), [fill_age])
    graph.append(EmbarkedToCode(), [sex_to_code])
    ds = graph.run(ds)

    print("## Processed data")
    print(ds.get("titanic").content)
Beispiel #10
0
    repo_s = SqlAlchemyRepository(engine)

    md = SqlAlchemyModelData(repo_s, Titanic)
    md.update_dataframe(titanic_data.content)
    md.save()


if __name__ == "__main__":
    basicConfig(level=DEBUG)

    # データセットの読み込み・DBの準備
    engine = create_engine("sqlite:///example.sqlite3", echo=True)
    prepare_db(engine)

    repo = SqlAlchemyRepository(engine)
    d = SqlAlchemyModelData(repo, Titanic)

    d.query()
    passenger_ids = [m.PassengerId for m in d.content]

    # データを一行ずつ SQLAlchemy のモデルに取り出し、処理して書き戻す例
    for passenger_id in passenger_ids:
        d.query(lambda x: x.filter(Titanic.PassengerId == passenger_id))

        ds = DataSet()
        ds.put("titanic", d)
        ds = SexToCode().main(ds)
        ds = EmbarkedToCode().main(ds)
        ds.save_all()