def iris_vd(base):
    from verticapy.learn.datasets import load_iris

    iris = load_iris(cursor=base.cursor)
    iris.set_display_parameters(print_info=False)
    yield iris
    drop_table(name="public.iris", cursor=base.cursor)
Ejemplo n.º 2
0
def titanic_vd(base):
    from verticapy.learn.datasets import load_titanic

    titanic = load_titanic(cursor=base.cursor)
    titanic.set_display_parameters(print_info=False)
    yield titanic
    drop_table(name="public.titanic", cursor=base.cursor)
Ejemplo n.º 3
0
def winequality_vd(base):
    from verticapy.learn.datasets import load_winequality

    winequality = load_winequality(cursor=base.cursor)
    yield winequality
    with warnings.catch_warnings(record=True) as w:
        drop_table(name="public.winequality", cursor=base.cursor)
def iris_vd(base):
    from verticapy.learn.datasets import load_iris

    iris = load_iris(cursor=base.cursor)
    yield iris
    with warnings.catch_warnings(record=True) as w:
        drop_table(name="public.iris", cursor=base.cursor)
Ejemplo n.º 5
0
def market_vd(base):
    from verticapy.learn.datasets import load_market

    market = load_market(cursor=base.cursor)
    yield market
    with warnings.catch_warnings(record=True) as w:
        drop_table(name="public.market", cursor=base.cursor)
def smart_meters_vd(base):
    from verticapy.learn.datasets import load_smart_meters

    smart_meters = load_smart_meters(cursor=base.cursor)
    yield smart_meters
    with warnings.catch_warnings(record=True) as w:
        drop_table(name="public.smart_meters", cursor=base.cursor)
def tr_data_vd(base):
    base.cursor.execute("DROP TABLE IF EXISTS public.tr_data")
    base.cursor.execute(
        'CREATE TABLE IF NOT EXISTS public.tr_data(Id INT, transportation INT, gender VARCHAR, "owned cars" INT, cost VARCHAR, income CHAR(4))'
    )
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (1, 0, 'Male', 0, 'Cheap', 'Low')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (2, 0, 'Male', 1, 'Cheap', 'Med')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (3, 1, 'Female', 1, 'Cheap', 'Med')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (4, 0, 'Female', 0, 'Cheap', 'Low')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (5, 0, 'Male', 1, 'Cheap', 'Med')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (6, 1, 'Male', 0, 'Standard', 'Med')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (7, 1, 'Female', 1, 'Standard', 'Med')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (8, 2, 'Female', 1, 'Expensive', 'Hig')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (9, 2, 'Male', 2, 'Expensive', 'Med')")
    base.cursor.execute(
        "INSERT INTO tr_data VALUES (10, 2, 'Female', 2, 'Expensive', 'Hig')")
    base.cursor.execute("COMMIT")

    tr_data = vDataFrame(input_relation="public.tr_data", cursor=base.cursor)
    yield tr_data
    with warnings.catch_warnings(record=True) as w:
        drop_table(name="public.tr_data", cursor=base.cursor)
Ejemplo n.º 8
0
def titanic_vd(base):
    from verticapy.learn.datasets import load_titanic

    titanic = load_titanic(cursor=base.cursor)
    yield titanic
    with warnings.catch_warnings(record=True) as w:
        drop_table(name="public.titanic", cursor=base.cursor)
Ejemplo n.º 9
0
def winequality_vd(base):
    from verticapy.learn.datasets import load_winequality

    winequality = load_winequality(cursor=base.cursor)
    winequality.set_display_parameters(print_info=False)
    yield winequality
    drop_table(name="public.winequality", cursor=base.cursor)
Ejemplo n.º 10
0
def amazon_vd(base):
    from verticapy.learn.datasets import load_amazon

    amazon = load_amazon(cursor=base.cursor)
    yield amazon
    with warnings.catch_warnings(record=True) as w:
        drop_table(
            name="public.amazon", cursor=base.cursor,
        )
def smart_meters_vd(base):
    from verticapy.learn.datasets import load_smart_meters

    smart_meters = load_smart_meters(cursor=base.cursor)
    smart_meters.set_display_parameters(print_info=False)
    yield smart_meters
    drop_table(
        name="public.smart_meters", cursor=base.cursor,
    )
Ejemplo n.º 12
0
def iris_vd(base):
    from verticapy.learn.datasets import load_iris

    iris = load_iris(cursor=base.cursor)
    yield iris
    drop_table(
        name="public.iris",
        cursor=base.cursor,
    )
Ejemplo n.º 13
0
def amazon_vd(base):
    from verticapy.learn.datasets import load_amazon

    amazon = load_amazon(cursor=base.cursor)
    yield amazon
    drop_table(
        name="public.amazon",
        cursor=base.cursor,
    )
Ejemplo n.º 14
0
def amazon_vd(base):
    from verticapy.learn.datasets import load_amazon

    amazon = load_amazon(cursor=base.cursor)
    amazon.set_display_parameters(print_info=False)
    yield amazon
    drop_table(
        name="public.amazon", cursor=base.cursor,
    )
Ejemplo n.º 15
0
def world_vd(base):
    from verticapy.learn.datasets import load_world

    world = load_world(cursor=base.cursor)
    yield world
    with warnings.catch_warnings(record=True) as w:
        drop_table(
            name="public.world", cursor=base.cursor,
        )
Ejemplo n.º 16
0
def cities_vd(base):
    from verticapy.learn.datasets import load_cities

    cities = load_cities(cursor=base.cursor)
    yield cities
    with warnings.catch_warnings(record=True) as w:
        drop_table(
            name="public.cities", cursor=base.cursor,
        )
def market_vd(base):
    from verticapy.learn.datasets import load_market

    market = load_market(cursor=base.cursor)
    market.set_display_parameters(print_info=False)
    yield market
    drop_table(
        name="public.market", cursor=base.cursor,
    )
Ejemplo n.º 18
0
def bsk_data_vd(base):
    base.cursor.execute("DROP TABLE IF EXISTS public.bsk_data")
    base.cursor.execute(
        "CREATE TABLE IF NOT EXISTS public.bsk_data(Id INT, col1 FLOAT, col2 FLOAT, col3 FLOAT, col4 FLOAT)"
    )
    base.cursor.execute("INSERT INTO bsk_data VALUES (1, 7.2, 3.6, 6.1, 2.5)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (2, 7.7, 2.8, 6.7, 2.0)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (3, 7.7, 3.0, 6.1, 2.3)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (4, 7.9, 3.8, 6.4, 2.0)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (5, 4.4, 2.9, 1.4, 0.2)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (6, 4.6, 3.6, 1.0, 0.2)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (7, 4.7, 3.2, 1.6, 0.2)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (8, 6.5, 2.8, 4.6, 1.5)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (9, 6.8, 2.8, 4.8, 1.4)")
    base.cursor.execute("INSERT INTO bsk_data VALUES (10, 7.0, 3.2, 4.7, 1.4)")
    base.cursor.execute("COMMIT")

    bsk_data = vDataFrame(input_relation="public.bsk_data", cursor=base.cursor)
    yield bsk_data
    with warnings.catch_warnings(record=True) as w:
        drop_table(name="public.bsk_data", cursor=base.cursor)
Ejemplo n.º 19
0
 def test_read_shp(self, cities_vd):
     with warnings.catch_warnings(record=True) as w:
         drop_table(
             name="public.cities_test",
             cursor=cities_vd._VERTICAPY_VARIABLES_["cursor"],
         )
     cities_vd.to_shp("cities_test", "/home/dbadmin/", shape="Point")
     vdf = read_shp("/home/dbadmin/cities_test.shp",
                    cities_vd._VERTICAPY_VARIABLES_["cursor"])
     assert vdf.shape() == (202, 3)
     try:
         os.remove("/home/dbadmin/cities_test.shp")
         os.remove("/home/dbadmin/cities_test.shx")
         os.remove("/home/dbadmin/cities_test.dbf")
     except:
         pass
     with warnings.catch_warnings(record=True) as w:
         drop_table(
             name="public.cities_test",
             cursor=cities_vd._VERTICAPY_VARIABLES_["cursor"],
         )
def market_vd(base):
    from verticapy.learn.datasets import load_market

    market = load_market(cursor=base.cursor)
    yield market
    drop_table(name="public.market", cursor=base.cursor)
Ejemplo n.º 21
0
def titanic_vd(base):
    from verticapy.learn.datasets import load_titanic

    titanic = load_titanic(cursor=base.cursor)
    yield titanic
    drop_table(name="public.titanic", cursor=base.cursor)
Ejemplo n.º 22
0
def smart_meters_vd(base):
    from verticapy.learn.datasets import load_smart_meters

    smart_meters = load_smart_meters(cursor=base.cursor)
    yield smart_meters
    drop_table(name="public.smart_meters", cursor=base.cursor)
 def test_vDF_to_db(self, titanic_vd):
     try:
         drop_view("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
     except:
         pass
     # testing relation_type = view
     try:
         titanic_vd.copy().to_db(name="verticapy_titanic_tmp", usecols = ["age", "fare", "survived"], relation_type = "view", db_filter = "age > 40", nb_split = 3)
         titanic_tmp = vDataFrame("verticapy_titanic_tmp", cursor=titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         assert titanic_tmp.shape() == (220, 4)
         assert titanic_tmp["_verticapy_split_"].min() == 0
         assert titanic_tmp["_verticapy_split_"].max() == 2
         titanic_vd._VERTICAPY_VARIABLES_["cursor"].execute("SELECT table_name FROM view_columns WHERE table_name = 'verticapy_titanic_tmp'")
         result = titanic_vd._VERTICAPY_VARIABLES_["cursor"].fetchone()
         assert result[0] == "verticapy_titanic_tmp"
     except:
         drop_view("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         raise
     drop_view("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
     # testing relation_type = table
     try:
         titanic_vd.copy().to_db(name="verticapy_titanic_tmp", usecols = ["age", "fare", "survived"], relation_type = "table", db_filter = "age > 40", nb_split = 3)
         titanic_tmp = vDataFrame("verticapy_titanic_tmp", cursor=titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         assert titanic_tmp.shape() == (220, 4)
         assert titanic_tmp["_verticapy_split_"].min() == 0
         assert titanic_tmp["_verticapy_split_"].max() == 2
         titanic_vd._VERTICAPY_VARIABLES_["cursor"].execute("SELECT table_name FROM columns WHERE table_name = 'verticapy_titanic_tmp'")
         result = titanic_vd._VERTICAPY_VARIABLES_["cursor"].fetchone()
         assert result[0] == "verticapy_titanic_tmp"
     except:
         drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         raise
     drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
     # testing relation_type = temporary table
     try:
         titanic_vd.copy().to_db(name="verticapy_titanic_tmp", usecols = ["age", "fare", "survived"], relation_type = "temporary", db_filter = "age > 40", nb_split = 3)
         titanic_tmp = vDataFrame("verticapy_titanic_tmp", cursor=titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         assert titanic_tmp.shape() == (220, 4)
         assert titanic_tmp["_verticapy_split_"].min() == 0
         assert titanic_tmp["_verticapy_split_"].max() == 2
         titanic_vd._VERTICAPY_VARIABLES_["cursor"].execute("SELECT table_name FROM columns WHERE table_name = 'verticapy_titanic_tmp'")
         result = titanic_vd._VERTICAPY_VARIABLES_["cursor"].fetchone()
         assert result[0] == "verticapy_titanic_tmp"
     except:
         drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         raise
     drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
     # testing relation_type = temporary local table
     try:
         titanic_vd.copy().to_db(name="verticapy_titanic_tmp", usecols = ["age", "fare", "survived"], relation_type = "local", db_filter = "age > 40", nb_split = 3)
         titanic_tmp = vDataFrame("v_temp_schema.verticapy_titanic_tmp", cursor=titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         assert titanic_tmp.shape() == (220, 4)
         assert titanic_tmp["_verticapy_split_"].min() == 0
         assert titanic_tmp["_verticapy_split_"].max() == 2
         titanic_vd._VERTICAPY_VARIABLES_["cursor"].execute("SELECT table_name FROM columns WHERE table_name = 'verticapy_titanic_tmp'")
         result = titanic_vd._VERTICAPY_VARIABLES_["cursor"].fetchone()
         assert result[0] == "verticapy_titanic_tmp"
     except:
         drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
         raise
     drop_table("verticapy_titanic_tmp", titanic_vd._VERTICAPY_VARIABLES_["cursor"])
Ejemplo n.º 24
0
def winequality_vd(base):
    from verticapy.learn.datasets import load_winequality

    winequality = load_winequality(cursor=base.cursor)
    yield winequality
    drop_table(name="public.winequality", cursor=base.cursor)
    def test_vDF_join(self, market_vd):
        # CREATE TABLE not_fresh AS SELECT * FROM market WHERE Form != 'Fresh';
        not_fresh = market_vd.search("Form != 'Fresh'")
        # CREATE TABLE not_dried AS SELECT * FROM market WHERE Form != 'Dried';
        not_dried = market_vd.search("Form != 'Dried'")

        # CREATE TABLE left_join AS
        #        SELECT a.Name as Name1, b.Name as Name2
        #        FROM not_fresh AS a LEFT JOIN not_dried AS b ON a.Form = b.Form;
        left_join = not_fresh.join(
            not_dried,
            how="left",
            on={"Form": "Form"},
            expr1=["Name AS Name1"],
            expr2=["Name AS Name2"],
        )
        assert left_join.shape() == (5886, 2)
        # SELECT COUNT(*) FROM left_join WHERE Name1 IS NULL;
        assert left_join["Name1"].count() == 5886
        # SELECT COUNT(*) FROM left_join WHERE Name2 IS NULL;
        assert left_join["Name2"].count() == 5886 - 30

        # CREATE TABLE right_join AS
        #        SELECT a.Name as Name1, b.Name as Name2
        #        FROM not_fresh AS a RIGHT JOIN not_dried AS b ON a.Form = b.Form;
        right_join = not_fresh.join(
            not_dried,
            how="right",
            on={"Form": "Form"},
            expr1=["Name AS Name1"],
            expr2=["Name AS Name2"],
        )
        assert right_join.shape() == (5946, 2)
        # SELECT COUNT(*) FROM right_join WHERE Name1 IS NULL;
        assert right_join["Name1"].count() == 5946 - 90
        # SELECT COUNT(*) FROM right_join WHERE Name2 IS NULL;
        assert right_join["Name2"].count() == 5946

        # CREATE TABLE full_join AS
        #        SELECT a.Name as Name1, b.Name as Name2
        #        FROM not_fresh AS a FULL OUTER JOIN not_dried AS b ON a.Form = b.Form;
        full_join = not_fresh.join(
            not_dried,
            how="full",
            on={"Form": "Form"},
            expr1=["Name AS Name1"],
            expr2=["Name AS Name2"],
        )
        assert full_join.shape() == (5976, 2)
        # SELECT COUNT(*) FROM full_join WHERE Name1 IS NULL;
        assert full_join["Name1"].count() == 5976 - 90
        # SELECT COUNT(*) FROM full_join WHERE Name2 IS NULL;
        assert full_join["Name2"].count() == 5976 - 30

        # CREATE TABLE inner_join AS
        #        SELECT a.Name as Name1, b.Name as Name2
        #        FROM not_fresh AS a INNER JOIN not_dried AS b ON a.Form = b.Form;
        inner_join = not_fresh.join(
            not_dried,
            how="inner",
            on={"Form": "Form"},
            expr1=["Name AS Name1"],
            expr2=["Name AS Name2"],
        )
        assert inner_join.shape() == (5856, 2)
        # SELECT COUNT(*) FROM inner_join WHERE Name1 IS NULL;
        assert inner_join["Name1"].count() == 5856
        # SELECT COUNT(*) FROM inner_join WHERE Name2 IS NULL;
        assert inner_join["Name2"].count() == 5856

        # CREATE TABLE natural_join AS
        #        SELECT a.Name as Name1, b.Name as Name2
        #        FROM not_fresh AS a NATURAL JOIN not_dried AS b;
        natural_join = not_fresh.join(
            not_dried, how="natural", expr1=["Name AS Name1"], expr2=["Name AS Name2"]
        )
        assert natural_join.shape() == (194, 2)
        # SELECT COUNT(*) FROM natural_join WHERE Name1 IS NULL;
        assert natural_join["Name1"].count() == 194
        # SELECT COUNT(*) FROM natural_join WHERE Name2 IS NULL;
        assert natural_join["Name2"].count() == 194

        # CREATE TABLE cross_join AS
        #        SELECT a.Name as Name1, b.Name as Name2
        #        FROM not_fresh AS a CROSS JOIN not_dried AS b;
        cross_join = not_fresh.join(
            not_dried, how="cross", expr1=["Name AS Name1"], expr2=["Name AS Name2"]
        )
        assert cross_join.shape() == (63616, 2)
        # SELECT COUNT(*) FROM cross_join WHERE Name1 IS NULL;
        assert cross_join["Name1"].count() == 63616
        # SELECT COUNT(*) FROM cross_join WHERE Name2 IS NULL;
        assert cross_join["Name2"].count() == 63616

        # join directly with a Vertica table
        not_dried.to_db("not_dried", relation_type="local")
        table_join = not_fresh.join(
            "v_temp_schema.not_dried",
            how="natural",
            expr1=["Name AS Name1"],
            expr2=["Name AS Name2"],
        )
        assert table_join.shape() == (194, 2)
        drop_table(
            "v_temp_schema.not_dried", not_dried._VERTICAPY_VARIABLES_["cursor"],
        )