Ejemplo n.º 1
0
    def main(self, ds):
        df = ds.get("titanic").content
        df[self.column] = df[self.column].fillna(df[self.column].median())

        rds = DataSet()
        rds.put(f"fillna_{self.column}", DataFrameData(df[self.column]))
        return rds
Ejemplo n.º 2
0
    def main(self, ds):
        df = ds.get("titanic").content
        df.loc[df["Sex"] == "male", "Sex"] = 0
        df.loc[df["Sex"] == "female", "Sex"] = 1

        rds = DataSet()
        rds.put("sex_to_code", DataFrameData(df["Sex"]))
        return rds
Ejemplo n.º 3
0
    def main(self, ds):
        df = ds.get("titanic").content
        df.loc[df["Sex"] == "male", "Sex"] = 0
        df.loc[df["Sex"] == "female", "Sex"] = 1

        ds = DataSet()
        ds.put("titanic", DataFrameData(df))
        return ds
Ejemplo n.º 4
0
    def main(self, ds):
        df = ds.get("titanic").content
        df["Embarked"] = df["Embarked"].fillna("S")
        df.loc[df["Embarked"] == "S", "Embarked"] = 0
        df.loc[df["Embarked"] == "C", "Embarked"] = 1
        df.loc[df["Embarked"] == "Q", "Embarked"] = 2

        rds = DataSet()
        rds.put("embarked_to_code", DataFrameData(df["Embarked"]))
        return rds
Ejemplo n.º 5
0
    def main(self, ds):
        df = ds.get("titanic").content
        df["Sex"][df["Sex"] == "male"] = 0
        df["Sex"][df["Sex"] == "female"] = 1

        rds = DataSet()
        rds.put("sex_to_code", DataFrameData(df["Sex"]))

        time.sleep(random.randint(3, 10))
        return rds
Ejemplo n.º 6
0
    def main(self, ds):
        df = ds.get("titanic").content

        df = df.drop(["Age", "Sex", "Embarked"], axis=1)
        df = df.join(ds.get("fillna_Age").content)
        df = df.join(ds.get("sex_to_code").content)
        df = df.join(ds.get("embarked_to_code").content)

        rds = DataSet()
        rds.put("titanic_result", DataFrameData(df))
        return rds
Ejemplo n.º 7
0
    def main(self, ds):
        df = ds.get("titanic").content
        df["Embarked"] = df["Embarked"].fillna("S")
        df["Embarked"][df["Embarked"] == "S"] = 0
        df["Embarked"][df["Embarked"] == "C"] = 1
        df["Embarked"][df["Embarked"] == "Q"] = 2

        rds = DataSet()
        rds.put("embarked_to_code", DataFrameData(df["Embarked"]))

        time.sleep(random.randint(3, 10))
        return rds
Ejemplo n.º 8
0
    def test_dataframe(self):
        repo = LocalFileRepository(Path(os.path.dirname(__file__)) / Path("titanic.csv"))
        data = DataFrameData.load(repo)

        db_repo = PandasDbRepository(self.engine, "titanic")

        dfd = DataFrameData(data.content, db_repo)
        dfd.save()

        dfd2 = DataFrameData.load(db_repo)
        self.assertIsNotNone(dfd2.content)
        self.assertTrue(data.content.equals(dfd2.content))