Exemple #1
0
    def test_query_with_spacing(self):

        df = pd.DataFrame({
            "letter_pos": [i for i in range(len(string.ascii_letters))],
            "l2":
            list(string.ascii_letters)
        })

        df2 = pd.DataFrame({
            "letter_pos": [i for i in range(len(string.ascii_letters))],
            "letter":
            list(string.ascii_letters)
        })

        result = SQLDF(locals()).execute(
            "select a.*, b.letter from df a inner join df2 b on a.l2 = b.letter limit 20;"
        )
        self.assertEqual(len(result), 20)

        q = """
            select
            a.*
        from
            df a
        inner join
            df2 b
        on a.l2 = b.letter
        limit 20
        ;"""
        result = SQLDF(locals()).execute(q)
        self.assertEqual(len(result), 20)
Exemple #2
0
 def test_set_udf_method(self):
     sqldf = SQLDF(locals())
     conn = sqldf.conn
     self.default_df.to_sql("df", conn)
     sqldf._set_udf(self.default_udfs)
     self.assertEqual(list(conn.execute("select udf1(label) from df;")),
                      [("l1", ), ("l2", ), ("l3", )])
Exemple #3
0
 def test_set_udaf_method_with_agg_class(self):
     sqldf = SQLDF(locals())
     conn = sqldf.conn
     self.default_df.to_sql("df", conn)
     sqldf._set_udaf(self.default_udafs)
     self.assertEqual(list(conn.execute("select udaf1(label) from df;")),
                      [(3, )])
Exemple #4
0
 def test_execute_method_with_table_not_found(self):
     sqldf = SQLDF(self.default_env)
     self.assertRaises(Exception,
                       lambda: sqldf.execute("select * from notable"))
     # table deleted
     self.assertRaises(sqlite3.OperationalError,
                       lambda: sqldf.conn.execute("select * from df;"))
Exemple #5
0
 def test_execute_method_with_table_not_found(self):
     sqldf = SQLDF(self.default_env)
     self.assertRaises(
         Exception,
         lambda: sqldf.execute("select * from notable"))
     # table deleted
     self.assertRaises(sqlite3.OperationalError,
                       lambda: sqldf.conn.execute("select * from df;"))
Exemple #6
0
 def test_ensure_data_frame_method_tuple_of_list(self):
     data = ([1, 2, 3], [4, 5, 6])
     sqldf = SQLDF(locals())
     self.assertRaises(
         Exception,
         lambda: sqldf._ensure_data_frame(
             data,
             "df"))
Exemple #7
0
 def test_ensure_data_frame_method_nested_tuple(self):
     data = ((1, 2, 3), (4, 5, 6))
     sqldf = SQLDF(locals())
     self.assertRaises(
         Exception,
         lambda: sqldf._ensure_data_frame(
             data,
             "df"))
Exemple #8
0
 def test_execute_method(self):
     sqldf = SQLDF(self.default_env)
     query = "select * from df;"
     result = sqldf.execute(query)
     assert_frame_equal(result, self.default_df)
     # table deleted
     self.assertRaises(sqlite3.OperationalError,
                       lambda: sqldf.conn.execute(query))
Exemple #9
0
 def test_write_table_method_garbage_table(self):
     df = [[1, 2], [3, [4]]]
     sqldf = SQLDF(locals())
     self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))
     # table destroyed
     cursor = sqldf.conn.cursor()
     tablemaster = list(cursor.execute("select * from sqlite_master where type='table';"))
     self.assertEqual(tablemaster, [])
Exemple #10
0
 def test_write_table_method_garbage_table(self):
     df = [[1, 2], [3, [4]]]
     sqldf = SQLDF(locals())
     self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))
     # table destroyed
     cursor = sqldf.conn.cursor()
     tablemaster = list(
         cursor.execute("select * from sqlite_master where type='table';"))
     self.assertEqual(tablemaster, [])
Exemple #11
0
 def test_write_table_method(self):
     df = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"])
     sqldf = SQLDF(locals())
     sqldf._write_table("tbl", df)
     # table created
     cursor = sqldf.conn.cursor()
     sq_type, name, tbl_name, rootpage, sql = list(
         cursor.execute("select * from sqlite_master where type='table';"))[0]
     self.assertEqual(name, "tbl")
Exemple #12
0
 def test_execute_method(self):
     sqldf = SQLDF(self.default_env)
     query = "select * from df;"
     result = sqldf.execute(query)
     assert_frame_equal(result, self.default_df)
     # table deleted
     self.assertRaises(
         sqlite3.OperationalError,
         lambda: sqldf.conn.execute(query))
Exemple #13
0
 def test_set_udaf_method_with_agg_class(self):
     sqldf = SQLDF(locals())
     conn = sqldf.conn
     self.default_df.to_sql("df", conn)
     sqldf._set_udaf(self.default_udafs)
     self.assertEqual(
         list(
             conn.execute("select udaf1(label) from df;")), [
             (3,)])
Exemple #14
0
 def test_set_udf_method(self):
     sqldf = SQLDF(locals())
     conn = sqldf.conn
     self.default_df.to_sql("df", conn)
     sqldf._set_udf(self.default_udfs)
     self.assertEqual(
         list(
             conn.execute("select udf1(label) from df;")), [
             ("l1",), ("l2",), ("l3",)])
Exemple #15
0
 def test_del_table_method(self):
     sqldf = SQLDF(locals())
     cursor = sqldf.conn.cursor()
     # create table
     cursor.execute("create table deltbl(col);")
     sqldf._del_table(["deltbl"])
     self.assertEqual(
         list(
             cursor.execute(
                 "select * from sqlite_master where type='table';")), [])
Exemple #16
0
 def test_write_table_method(self):
     df = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"])
     sqldf = SQLDF(locals())
     sqldf._write_table("tbl", df)
     # table created
     cursor = sqldf.conn.cursor()
     sq_type, name, tbl_name, rootpage, sql = list(
         cursor.execute(
             "select * from sqlite_master where type='table';"))[0]
     self.assertEqual(name, "tbl")
Exemple #17
0
 def test_del_table_method(self):
     sqldf = SQLDF(locals())
     cursor = sqldf.conn.cursor()
     # create table
     cursor.execute("create table deltbl(col);")
     sqldf._del_table(["deltbl"])
     self.assertEqual(
         list(
             cursor.execute("select * from sqlite_master where type='table';")),
         [])
Exemple #18
0
    def test_set_udaf_method_with_agg_function(self):
        sqldf = SQLDF(locals())
        conn = sqldf.conn
        self.default_df.to_sql("df", conn)

        def agg_func(values):
            return len(values)

        sqldf._set_udaf({"mycount": agg_func})
        self.assertEqual(list(conn.execute("select mycount(label) from df;")),
                         [(3, )])
Exemple #19
0
    def test_set_udaf_method_with_agg_function(self):
        sqldf = SQLDF(locals())
        conn = sqldf.conn
        self.default_df.to_sql("df", conn)

        def agg_func(values):
            return len(values)
        sqldf._set_udaf({"mycount": agg_func})
        self.assertEqual(
            list(
                conn.execute("select mycount(label) from df;")), [
                (3,)])
Exemple #20
0
 def test_select(self):
     df = pd.DataFrame({
         "letter_pos": [i for i in range(len(string.ascii_letters))],
         "l2":
         list(string.ascii_letters)
     })
     result = SQLDF(locals()).execute("select * from df limit 10;")
     self.assertEqual(len(result), 10)
Exemple #21
0
 def test_destructor_with_inmemory_db(self):
     sqldf = SQLDF(self.default_env)
     conn = sqldf.conn
     self.assertRaises(sqlite3.OperationalError,
                       lambda: conn.execute("select * from tbl;"))
     sqldf = None  # destruct
     self.assertRaises(sqlite3.ProgrammingError,
                       lambda: conn.execute("select * from tbl;"))
Exemple #22
0
 def test_constructor_with_default(self):
     sqldf = SQLDF(self.default_env)
     self.assertEqual(isinstance(sqldf, SQLDF), True)
     self.assertEqual(sqldf.env, self.default_env)
     self.assertEqual(sqldf.inmemory, True)
     self.assertEqual(sqldf._dbname, ":memory:")
     self.assertEqual(sqldf.udfs, {})
     self.assertEqual(sqldf.udafs, {})
     self.assertEqual(isinstance(sqldf.conn, sqlite3.Connection), True)
Exemple #23
0
 def test_in(self):
     courseData = {
         "courseCode": ["TM351", "TU100", "M269"],
         "points": [30, 60, 30],
         "level": ["3", "1", "2"]
     }
     course_df = pd.DataFrame(courseData)
     q = "select * from course_df where courseCode in ( 'TM351', 'TU100' );"
     result = SQLDF(locals()).execute(q)
     self.assertEqual(len(result), 2)
Exemple #24
0
 def test_destructor_with_fs_db(self):
     sqldf = SQLDF(self.default_env, inmemory=False)
     conn = sqldf.conn
     self.assertRaises(sqlite3.OperationalError,
                       lambda: conn.execute("select * from tbl;"))
     self.assertEqual(os.path.exists(".pysqldf.db"), True)
     sqldf = None  # destruct
     self.assertRaises(sqlite3.ProgrammingError,
                       lambda: conn.execute("select * from tbl;"))
     self.assertEqual(os.path.exists(".pysqldf.db"), False)
Exemple #25
0
 def test_extract_table_names_method(self):
     sqldf = SQLDF(self.default_env)
     tablenames = {
         "select * from df;": ["df"],
         "select * from df": ["df"],
         "select * from _": ["_"],
         "select * from 11;": [],
         "select * from 1ab;": [],
         "select * from a-b;": [],
         "select * from a.b;": [],
         "select a;": [],
         "select * from (select * from subq_df) f;": ["subq_df"],
         "select * from df d1 inner join df2 d2 on d1.id = d2.id;": ["df", "df2"],
         "select a, b c from df where c in (select foo from df2 inner join df3 on df2.id = df3.id);": ["df", "df2", "df3"],
         "select * from df where a in (select a from (select c from df2 where c in (select a from df3 inner join df4 on df3.id = df4.id)));": ["df", "df2", "df3", "df4"]
     }
     for query, tablename in tablenames.items():
         self.assertEqual(
             set(sqldf._extract_table_names(query)), set(tablename))
Exemple #26
0
 def test_constructor_with_assign(self):
     sqldf = SQLDF(self.default_env,
                   inmemory=False,
                   udfs=self.default_udfs,
                   udafs=self.default_udafs)
     self.assertEqual(isinstance(sqldf, SQLDF), True)
     self.assertEqual(sqldf.env, self.default_env)
     self.assertEqual(sqldf.inmemory, False)
     self.assertEqual(sqldf._dbname, ".pysqldf.db")
     self.assertEqual(sqldf.udfs, self.default_udfs)
     self.assertEqual(sqldf.udafs, self.default_udafs)
     self.assertEqual(isinstance(sqldf.conn, sqlite3.Connection), True)
Exemple #27
0
    def test_udf(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]

        def ten(x):
            return 10

        result = SQLDF(locals(), udfs={
            "ten": ten
        }).execute("SELECT ten(a) AS ten FROM data;")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["ten"])
        self.assertEqual(list(result.index), [0, 1])
        self.assertEqual(list(result["ten"]), [10, 10])
Exemple #28
0
 def test_extract_table_names_method(self):
     sqldf = SQLDF(self.default_env)
     tablenames = {
         "select * from df;": ["df"],
         "select * from df": ["df"],
         "select * from _": ["_"],
         "select * from 11;": [],
         "select * from 1ab;": [],
         "select * from a-b;": [],
         "select * from a.b;": [],
         "select a;": [],
         "select * from (select * from subq_df) f;": ["subq_df"],
         "select * from df d1 inner join df2 d2 on d1.id = d2.id;":
         ["df", "df2"],
         "select a, b c from df where c in (select foo from df2 inner join df3 on df2.id = df3.id);":
         ["df", "df2", "df3"],
         "select * from df where a in (select a from (select c from df2 where c in (select a from df3 inner join df4 on df3.id = df4.id)));":
         ["df", "df2", "df3", "df4"]
     }
     for query, tablename in tablenames.items():
         self.assertEqual(set(sqldf._extract_table_names(query)),
                          set(tablename))
Exemple #29
0
    def test_udaf(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]

        class mycount(object):
            def __init__(self):
                super(mycount, self).__init__()
                self.count = 0

            def step(self, x):
                self.count += x

            def finalize(self):
                return self.count

        result = SQLDF(locals(), udafs={
            "mycount": mycount
        }).execute("select mycount(a) as mycount from data;")
        self.assertEqual(len(result), 1)
        self.assertEqual(list(result.columns), ["mycount"])
        self.assertEqual(list(result.index), [0])
        self.assertEqual(list(result["mycount"]), [1 + 4])
Exemple #30
0
    def test_in_with_subquery(self):
        programData = {
            "courseCode": [
                "TM351", "TM351", "TM351", "TU100", "TU100", "TU100", "M269",
                "M269", "M269"
            ],
            "programCode":
            ["AB1", "AB2", "AB3", "AB1", "AB3", "AB4", "AB3", "AB4", "AB5"]
        }
        program_df = pd.DataFrame(programData)

        courseData = {
            "courseCode": ["TM351", "TU100", "M269"],
            "points": [30, 60, 30],
            "level": ["3", "1", "2"]
        }
        course_df = pd.DataFrame(courseData)

        q = """
            select * from course_df where courseCode in ( select distinct courseCode from program_df ) ;
          """
        result = SQLDF(locals()).execute(q)
        self.assertEqual(len(result), 3)
Exemple #31
0
 def test_execute_method_with_table_not_found(self):
     sqldf = SQLDF(self.default_env)
     self.assertIsNone(sqldf.execute("select * from notable;"))
Exemple #32
0
 def test_execute_method_returning_none(self):
     births = load_births()
     result = SQLDF(locals()).execute(
         "select a from births limit 10;")  # col a not exists
     self.assertEqual(result, None)
Exemple #33
0
 def test_execute_method_with_query_error(self):
     sqldf = SQLDF(self.default_env)
     self.assertEqual(sqldf.execute("select a from df uuuuuu;"), None)
     # table deleted
     self.assertRaises(sqlite3.OperationalError,
                       lambda: sqldf.conn.execute("select * from df;"))
Exemple #34
0
 def test_del_table_method_not_exist_table(self):
     sqldf = SQLDF(locals())
     self.assertRaises(
         sqlite3.OperationalError,
         lambda: sqldf._del_table(
             ["deltblaaaaaaa"]))
Exemple #35
0
 def test_write_table_method_col_with_right_bracket(self):
     df = DataFrame([[1]], columns=["co)l"])
     sqldf = SQLDF(locals())
     self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))
Exemple #36
0
 def test_datetime_query(self):
     meat = load_meat()
     result = SQLDF(locals()).execute("select * from meat limit 10;")
     self.assertEqual(len(result), 10)
Exemple #37
0
 def test_db_in_fs(self):
     data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]
     sqldf = SQLDF(locals(), inmemory=False)
     self.assertEqual(os.path.exists(".pysqldf.db"), True)
     sqldf = None  # run GC
     self.assertEqual(os.path.exists(".pysqldf.db"), False)
Exemple #38
0
 def test_invalid_colname(self):
     data = [{"a": "valid", "(b)": "invalid"}]
     sqldf = SQLDF(locals())
     self.assertRaises(
         Exception,
         lambda: sqldf.execute("select * from data;"))
#-------------------------------------------------------------------------------
## change from scientific notation to decimal point in pandas
pd.set_option('display.float_format', lambda x: '%.0f' % x)

#-------------------------------------------------------------------------------
##
 #Limiting floats output to 3 decimal points
pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x))
#-------------------------------------------------------------------------------
##Ignore warnings
import warnings; warnings.filterwarnings('ignore')

#-------------------------------------------------------------------------------
## Run SQL queries in Pandas
from pysqldf import SQLDF; sqldf = SQLDF(globals()); q = getattr(sqldf, 'execute')
import warnings; warnings.filterwarnings('ignore')

#-------------------------------------------------------------------------------
## Styling tables in Notebook
from IPython.display import HTML

def over_10M(value):
    if value > 10**7:
        color = 'green'
    else:
        color = 'blue'        
    return 'color: %s' % color

df.style.applymap(over_10M, subset=['AmountInUSD'])\
		.format({'AmountInUSD': "$ {:}"})
Exemple #40
0
 def test_execute_method_with_query_error(self):
     sqldf = SQLDF(self.default_env)
     self.assertEqual(sqldf.execute("select a from df uuuuuu;"), None)
     # table deleted
     self.assertRaises(sqlite3.OperationalError,
                       lambda: sqldf.conn.execute("select * from df;"))
Exemple #41
0
    def test_query_single_list(self):

        mylist = [i for i in range(10)]

        result = SQLDF(locals()).execute("select * from mylist")
        self.assertEqual(len(result), 10)
#!/opt/miniconda3/bin/python

import subprocess
import argparse
import math
from pysqldf import SQLDF
sqldf = SQLDF(globals())
import os
import pandas as pd
import numpy as np
from operator import itemgetter
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Reading arguments...
parser = argparse.ArgumentParser()
parser.add_argument("--pointset",
                    "-p",
                    default="Datasets/Berlin/berlin0-10.tsv",
                    help="Pointset file")
parser.add_argument("--input",
                    "-i",
                    default="/tmp/NotFound.flocks",
                    help="Input file")
parser.add_argument("--no_save",
                    dest='save',
                    action='store_false',
                    help="Save trajectories and points?")
parser.add_argument("--no_send",
                    dest='send',
                    action='store_false',
Exemple #43
0
    def test_query_list_of_tuples(self):

        mylist = [tuple([i for i in range(10)]), tuple([i for i in range(10)])]

        result = SQLDF(locals()).execute("select * from mylist")
        self.assertEqual(len(result), 2)
Exemple #44
0
    >>> add_datepart(df, 'A')
    >>> df
        AYear AMonth AWeek ADay ADayofweek ADayofyear AIs_month_end AIs_month_start AIs_quarter_end AIs_quarter_start AIs_year_end AIs_year_start AElapsed
    0   2000  3      10    11   5          71         False         False           False           False             False        False          952732800
    1   2000  3      10    12   6          72         False         False           False           False             False        False          952819200
    2   2000  3      11    13   0          73         False         False           False           False             False        False          952905600
    """
    fld = df[fldname]
    if not np.issubdtype(fld.dtype, np.datetime64):
        df[fldname] = fld = pd.to_datetime(fld, infer_datetime_format=True)
    targ_pre = re.sub('[Dd]ate$', '', fldname)
    for n in ('Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear',
              'Is_month_end', 'Is_month_start', 'Is_quarter_end',
              'Is_quarter_start', 'Is_year_end', 'Is_year_start'):
        df[targ_pre + n] = getattr(fld.dt, n.lower())
    df[targ_pre + 'Elapsed'] = fld.astype(np.int64) // 10**9
    if drop: df.drop(fldname, axis=1, inplace=True)


#-------------------------------------------------------------------------------
from pysqldf import SQLDF
sqldf = SQLDF(globals())
q = getattr(sqldf, 'execute')
import warnings
warnings.filterwarnings('ignore')

# Select or exlcude particular data type of columns in Dataframe
melb_numeric_predictors = melb_predictors.select_dtypes(exclude=['object'])

# Aggregation of same column
df.groupby(['RateCodeID']).agg({'Fare_amount': ['mean', 'median']})
Exemple #45
0
 def test_subquery(self):
     kermit = pd.DataFrame({"x": range(10)})
     q = "select * from (select * from kermit) tbl limit 2;"
     result = SQLDF(locals()).execute(q)
     self.assertEqual(len(result), 2)