def read_parquet(path, columns=None): """Load a parquet object from the file path, returning a DataFrame. Parameters ---------- path : string File path columns : list, default=None If not None, only these columns will be read from the file. Returns ------- DataFrame Examples -------- >>> ks.read_parquet('data.parquet', columns=['name', 'gender']) # doctest: +SKIP """ if columns is not None: columns = list(columns) if columns is None or len(columns) > 0: sdf = default_session().read.parquet(path) if columns is not None: fields = [field.name for field in sdf.schema] cols = [col for col in columns if col in fields] if len(cols) > 0: sdf = sdf.select(cols) else: sdf = default_session().createDataFrame([], schema=StructType()) else: sdf = default_session().createDataFrame([], schema=StructType()) return DataFrame(sdf)
def attach_distributed_sequence_column(sdf, column_name): """ This method attaches a Spark column that has a sequence in a distributed manner. This is equivalent to the column assigned when default index type 'distributed-sequence'. >>> sdf = ks.DataFrame(['a', 'b', 'c']).to_spark() >>> sdf = InternalFrame.attach_distributed_sequence_column(sdf, column_name="sequence") >>> sdf.show() # doctest: +NORMALIZE_WHITESPACE +--------+---+ |sequence| 0| +--------+---+ | 0| a| | 1| b| | 2| c| +--------+---+ """ if len(sdf.columns) > 0: try: jdf = sdf._jdf.toDF() sql_ctx = sdf.sql_ctx encoders = sql_ctx._jvm.org.apache.spark.sql.Encoders encoder = encoders.tuple(jdf.exprEnc(), encoders.scalaLong()) jrdd = jdf.localCheckpoint(False).rdd().zipWithIndex() df = spark.DataFrame( sql_ctx.sparkSession._jsparkSession.createDataset( jrdd, encoder).toDF(), sql_ctx) columns = df.columns return df.selectExpr( "`{}` as `{}`".format(columns[1], column_name), "`{}`.*".format(columns[0])) except py4j.protocol.Py4JError: if is_testing(): raise return InternalFrame._attach_distributed_sequence_column( sdf, column_name) else: cnt = sdf.count() if cnt > 0: return default_session().range(cnt).toDF(column_name) else: return default_session().createDataFrame( [], schema=StructType().add(column_name, data_type=LongType(), nullable=False))
def read_table(name: str) -> DataFrame: """ Read a Spark table and return a DataFrame. Parameters ---------- name : string Table name in Spark. Returns ------- DataFrame See Also -------- DataFrame.to_table read_delta read_parquet read_spark_io Examples -------- >>> ks.range(1).to_table('%s.my_table' % db) >>> ks.read_table('%s.my_table' % db) id 0 0 """ sdf = default_session().read.table(name) return DataFrame(sdf)
def get_option(key: str, default: Union[Any, _NoValueType] = _NoValue) -> Any: """ Retrieves the value of the specified option. Parameters ---------- key : str The key which should match a single option. default : object The default value if the option is not set yet. The value should be JSON serializable. Returns ------- result : the value of the option Raises ------ OptionError : if no such option exists and the default is not provided """ _check_option(key) if default is _NoValue: default = _options_dict[key].default _options_dict[key].validate(default) return json.loads(default_session().conf.get(_key_format(key), default=json.dumps(default)))
def test_value_counts(self): if LooseVersion(pyspark.__version__) < LooseVersion("2.4") and \ default_session().conf.get("spark.sql.execution.arrow.enabled") == "true": default_session().conf.set("spark.sql.execution.arrow.enabled", "false") try: self._test_value_counts() finally: default_session().conf.set("spark.sql.execution.arrow.enabled", "true") self.assertRaises( RuntimeError, lambda: ks.MultiIndex.from_tuples([('x', 'a'), ('x', 'b')]).value_counts()) else: self._test_value_counts()
def reset_option(key: str) -> None: """ Reset one option to their default value. Pass "all" as argument to reset all options. Parameters ---------- key : str If specified only option will be reset. Returns ------- None """ _check_option(key) default_session().conf.unset(_key_format(key))
def set_option(key: str, value: Any) -> None: """ Sets the value of the specified option. Parameters ---------- key : str The key which should match a single option. value : object New value of option. The value should be JSON serializable. Returns ------- None """ _check_option(key, value) default_session().conf.set(_key_format(key), json.dumps(value))
def set_option(key: str, value: str) -> None: """ Sets the value of the specified option. Parameters ---------- key : str The key which should match a single option. value : object New value of option. Returns ------- None """ _check_option_key(key) default_session().conf.set(_key_format(key), value)
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): if LooseVersion(pyspark.__version__) < LooseVersion("2.4") and \ default_session().conf.get("spark.sql.execution.arrow.enabled") == "true" and \ isinstance(self, MultiIndex): raise RuntimeError("if you're using pyspark < 2.4, set conf " "'spark.sql.execution.arrow.enabled' to 'false' " "for using this function with MultiIndex") return super(MultiIndex, self).value_counts( normalize=normalize, sort=sort, ascending=ascending, bins=bins, dropna=dropna)
def from_pandas(pdf: pd.DataFrame) -> '_InternalFrame': """ Create an immutable DataFrame from pandas DataFrame. :param pdf: :class:`pd.DataFrame` :return: the created immutable DataFrame """ columns = pdf.columns data_columns = [name_like_string(col) for col in columns] if isinstance(columns, pd.MultiIndex): column_index = columns.tolist() else: column_index = None column_index_names = columns.names index = pdf.index index_map = [] # type: List[IndexMap] if isinstance(index, pd.MultiIndex): if index.names is None: index_map = [(SPARK_INDEX_NAME_FORMAT(i), None) for i in range(len(index.levels))] else: index_map = [ (SPARK_INDEX_NAME_FORMAT(i) if name is None else name_like_string(name), name if name is None or isinstance(name, tuple) else (name, )) for i, name in enumerate(index.names) ] else: name = index.name index_map = [(name_like_string(name) if name is not None else SPARK_INDEX_NAME_FORMAT(0), name if name is None or isinstance(name, tuple) else (name, ))] index_columns = [index_column for index_column, _ in index_map] reset_index = pdf.reset_index() reset_index.columns = index_columns + data_columns schema = StructType([ StructField(name_like_string(name), infer_pd_series_spark_type(col), nullable=bool(col.isnull().any())) for name, col in reset_index.iteritems() ]) for name, col in reset_index.iteritems(): dt = col.dtype if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt): continue reset_index[name] = col.replace({np.nan: None}) sdf = default_session().createDataFrame(reset_index, schema=schema) return _InternalFrame( sdf=sdf, index_map=index_map, column_index=column_index, column_scols=[scol_for(sdf, col) for col in data_columns], column_index_names=column_index_names)
def read_parquet(path, columns=None) -> DataFrame: """Load a parquet object from the file path, returning a DataFrame. Parameters ---------- path : string File path columns : list, default=None If not None, only these columns will be read from the file. Returns ------- DataFrame See Also -------- DataFrame.to_parquet DataFrame.read_table DataFrame.read_delta DataFrame.read_spark_io Examples -------- >>> ks.range(1).to_parquet('%s/read_spark_io/data.parquet' % path) >>> ks.read_parquet('%s/read_spark_io/data.parquet' % path, columns=['id']) id 0 0 """ if columns is not None: columns = list(columns) if columns is None or len(columns) > 0: sdf = default_session().read.parquet(path) if columns is not None: fields = [field.name for field in sdf.schema] cols = [col for col in columns if col in fields] if len(cols) > 0: sdf = sdf.select(cols) else: sdf = default_session().createDataFrame([], schema=StructType()) else: sdf = default_session().createDataFrame([], schema=StructType()) return DataFrame(sdf)
def range(start: int, end: Optional[int] = None, step: int = 1, num_partitions: Optional[int] = None) -> DataFrame: """ Create a DataFrame with some range of numbers. The resulting DataFrame has a single int64 column named `id`, containing elements in a range from ``start`` to ``end`` (exclusive) with step value ``step``. If only the first parameter (i.e. start) is specified, we treat it as the end value with the start value being 0. This is similar to the range function in SparkSession and is used primarily for testing. Parameters ---------- start : int the start value (inclusive) end : int, optional the end value (exclusive) step : int, optional, default 1 the incremental step num_partitions : int, optional the number of partitions of the DataFrame Returns ------- DataFrame Examples -------- When the first parameter is specified, we generate a range of values up till that number. >>> ks.range(5) id 0 0 1 1 2 2 3 3 4 4 When start, end, and step are specified: >>> ks.range(start = 100, end = 200, step = 20) id 0 100 1 120 2 140 3 160 4 180 """ sdf = default_session().range(start=start, end=end, step=step, numPartitions=num_partitions) return DataFrame(sdf)
def read_parquet(path, columns=None): """Load a parquet object from the file path, returning a DataFrame. :param path: File path :param columns: If not None, only these columns will be read from the file. :return: :class:`DataFrame` """ if columns is not None: columns = list(columns) if columns is None or len(columns) > 0: sdf = default_session().read.parquet(path) if columns is not None: fields = [field.name for field in sdf.schema] cols = [col for col in columns if col in fields] if len(cols) > 0: sdf = sdf.select(cols) else: sdf = default_session().createDataFrame([], schema=StructType()) else: sdf = default_session().createDataFrame([], schema=StructType()) return DataFrame(sdf)
def from_pandas(pdf: pd.DataFrame) -> "InternalFrame": """ Create an immutable DataFrame from pandas DataFrame. :param pdf: :class:`pd.DataFrame` :return: the created immutable DataFrame """ columns = pdf.columns data_columns = [name_like_string(col) for col in columns] if isinstance(columns, pd.MultiIndex): column_labels = columns.tolist() else: column_labels = [(col, ) for col in columns] column_label_names = [ name if name is None or isinstance(name, tuple) else (name, ) for name in columns.names ] index_names = [ name if name is None or isinstance(name, tuple) else (name, ) for name in pdf.index.names ] index_columns = [ SPARK_INDEX_NAME_FORMAT(i) for i in range(len(index_names)) ] pdf = pdf.copy() pdf.index.names = index_columns reset_index = pdf.reset_index() reset_index.columns = index_columns + data_columns schema = StructType([ StructField( name, infer_pd_series_spark_type(col), nullable=bool(col.isnull().any()), ) for name, col in reset_index.iteritems() ]) for name, col in reset_index.iteritems(): dt = col.dtype if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt): continue reset_index[name] = col.replace({np.nan: None}) sdf = default_session().createDataFrame(reset_index, schema=schema) return InternalFrame( spark_frame=sdf, index_spark_columns=[scol_for(sdf, col) for col in index_columns], index_names=index_names, column_labels=column_labels, data_spark_columns=[scol_for(sdf, col) for col in data_columns], column_label_names=column_label_names, )
def _init_from_pandas(self, pdf, *args): metadata = Metadata.from_pandas(pdf) reset_index = pdf.reset_index() reset_index.columns = metadata.all_fields schema = StructType([StructField(name, infer_pd_series_spark_type(col), nullable=bool(col.isnull().any())) for name, col in reset_index.iteritems()]) for name, col in reset_index.iteritems(): dt = col.dtype if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt): continue reset_index[name] = col.replace({np.nan: None}) self._init_from_spark(default_session().createDataFrame(reset_index, schema=schema), metadata)
def sql(query: str) -> DataFrame: """ Execute a SQL query and return the result as a Koalas DataFrame. Parameters ---------- query : str the SQL query >>> ks.sql("select * from range(10) where id > 7") id 0 8 1 9 """ return DataFrame(default_session().sql(query))
def read_spark_io(path: Optional[str] = None, format: Optional[str] = None, schema: Union[str, 'StructType'] = None, **options) -> DataFrame: """Load a DataFrame from a Spark data source. Parameters ---------- path : string, optional Path to the data source. format : string, optional Specifies the output data source format. Some common ones are: - 'delta' - 'parquet' - 'orc' - 'json' - 'csv' schema : string or StructType, optional Input schema. If none, Spark tries to infer the schema automatically. The schema can either be a Spark StructType, or a DDL-formatted string like `col0 INT, col1 DOUBLE`. options : dict All other options passed directly into Spark's data source. See Also -------- DataFrame.to_spark_io DataFrame.read_table DataFrame.read_delta DataFrame.read_parquet Examples -------- >>> ks.range(1).to_spark_io('%s/read_spark_io/data.parquet' % path) >>> ks.read_spark_io( ... '%s/read_spark_io/data.parquet' % path, format='parquet', schema='id long') id 0 0 """ sdf = default_session().read.load(path=path, format=format, schema=schema, options=options) return DataFrame(sdf)
def from_pandas(pdf: pd.DataFrame) -> '_InternalFrame': """ Create an immutable DataFrame from pandas DataFrame. :param pdf: :class:`pd.DataFrame` :return: the created immutable DataFrame """ data_columns = [str(col) for col in pdf.columns] index = pdf.index index_map = [] # type: List[IndexMap] if isinstance(index, pd.MultiIndex): if index.names is None: index_map = [('__index_level_{}__'.format(i), None) for i in range(len(index.levels))] else: index_map = [ ('__index_level_{}__'.format(i) if name is None else name, name) for i, name in enumerate(index.names) ] else: index_map = [ (index.name if index.name is not None else '__index_level_0__', index.name) ] index_columns = [index_column for index_column, _ in index_map] reset_index = pdf.reset_index() reset_index.columns = index_columns + data_columns schema = StructType([ StructField(name, infer_pd_series_spark_type(col), nullable=bool(col.isnull().any())) for name, col in reset_index.iteritems() ]) for name, col in reset_index.iteritems(): dt = col.dtype if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt): continue reset_index[name] = col.replace({np.nan: None}) sdf = default_session().createDataFrame(reset_index, schema=schema) return _InternalFrame(sdf=sdf, index_map=index_map, data_columns=data_columns)
def get_option(key: str, default: Union[str, _NoValueType] = _NoValue) -> str: """ Retrieves the value of the specified option. Parameters ---------- key : str The key which should match a single option. default : str The default value if the option is not set yet. Returns ------- result : the value of the option Raises ------ OptionError : if no such option exists and the default is not provided """ _check_option_key(key) if default is _NoValue: default = _registered_options[key] return default_session().conf.get(_key_format(key), default=default)
def _model_udf(self): spark = default_session() return pyfunc.spark_udf(spark, model_uri=self._model_uri, result_type=self._return_type)
def read_csv(path, header='infer', names=None, usecols=None, mangle_dupe_cols=True, parse_dates=False, comment=None): """Read CSV (comma-separated) file into DataFrame. Parameters ---------- path : str The path string storing the CSV file to be read. header : int, list of int, default ‘infer’ Whether to to use as the column names, and the start of the data. Default behavior is to infer the column names: if no names are passed the behavior is identical to `header=0` and column names are inferred from the first line of the file, if column names are passed explicitly then the behavior is identical to `header=None`. Explicitly pass `header=0` to be able to replace existing names names : array-like, optional List of column names to use. If file contains no header row, then you should explicitly pass `header=None`. Duplicates in this list will cause an error to be issued. usecols : list-like or callable, optional Return a subset of the columns. If list-like, all elements must either be positional (i.e. integer indices into the document columns) or strings that correspond to column names provided either by the user in names or inferred from the document header row(s). If callable, the callable function will be evaluated against the column names, returning names where the callable function evaluates to `True`. mangle_dupe_cols : bool, default True Duplicate columns will be specified as 'X0', 'X1', ... 'XN', rather than 'X' ... 'X'. Passing in False will cause data to be overwritten if there are duplicate names in the columns. Currently only `True` is allowed. parse_dates : boolean or list of ints or names or list of lists or dict, default `False`. Currently only `False` is allowed. comment: str, optional Indicates the line should not be parsed. Returns ------- DataFrame See Also -------- DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. Examples -------- >>> ks.read_csv('data.csv') # doctest: +SKIP """ if mangle_dupe_cols is not True: raise ValueError("mangle_dupe_cols can only be `True`: %s" % mangle_dupe_cols) if parse_dates is not False: raise ValueError("parse_dates can only be `False`: %s" % parse_dates) if usecols is not None and not callable(usecols): usecols = list(usecols) if usecols is None or callable(usecols) or len(usecols) > 0: reader = default_session().read.option("inferSchema", "true") if header == 'infer': header = 0 if names is None else None if header == 0: reader.option("header", True) elif header is None: reader.option("header", False) else: raise ValueError("Unknown header argument {}".format(header)) if comment is not None: if not isinstance(comment, str) or len(comment) != 1: raise ValueError("Only length-1 comment characters supported") reader.option("comment", comment) sdf = reader.csv(path) if header is None: sdf = sdf.selectExpr(*[ "`%s` as `%s`" % (field.name, i) for i, field in enumerate(sdf.schema) ]) if names is not None: names = list(names) if len(set(names)) != len(names): raise ValueError('Found non-unique column index') if len(names) != len(sdf.schema): raise ValueError( 'Names do not match the number of columns: %d' % len(names)) sdf = sdf.selectExpr(*[ "`%s` as `%s`" % (field.name, name) for field, name in zip(sdf.schema, names) ]) if usecols is not None: if callable(usecols): cols = [ field.name for field in sdf.schema if usecols(field.name) ] missing = [] elif all(isinstance(col, int) for col in usecols): cols = [ field.name for i, field in enumerate(sdf.schema) if i in usecols ] missing = [ col for col in usecols if col >= len(sdf.schema) or sdf.schema[col].name not in cols ] elif all(isinstance(col, str) for col in usecols): cols = [ field.name for field in sdf.schema if field.name in usecols ] missing = [col for col in usecols if col not in cols] else: raise ValueError( "'usecols' must either be list-like of all strings, " "all unicode, all integers or a callable.") if len(missing) > 0: raise ValueError( 'Usecols do not match columns, columns expected but not ' 'found: %s' % missing) if len(cols) > 0: sdf = sdf.select(cols) else: sdf = default_session().createDataFrame([], schema=StructType()) else: sdf = default_session().createDataFrame([], schema=StructType()) return DataFrame(sdf)
def setUpClass(cls): cls.spark = default_session() cls.spark.conf.set(SPARK_CONF_ARROW_ENABLED, True)
def setUpClass(cls): cls.spark = default_session() cls.spark.conf.set("spark.sql.execution.arrow.enabled", True)
# files to reuse in Read the Docs build if "READTHEDOCS" not in os.environ: # Remove previously generated rst files. Ignore errors just in case it stops # generating whole docs. shutil.rmtree("%s/reference/api" % os.path.dirname(os.path.abspath(__file__)), ignore_errors=True) try: os.mkdir("%s/reference/api" % os.path.dirname(os.path.abspath(__file__))) except OSError as e: if e.errno != errno.EEXIST: raise # Lower the number of partitions to speed up documentation build utils.default_session({"spark.sql.shuffle.partitions": "4"}) def gendoc(): """Get releases from Github and generate reStructuredText files for release notes.""" source_dir = os.path.dirname(os.path.abspath(__file__)) whatsnew_dir = "%s/whatsnew" % source_dir # Read the Docs builds multiple times. To speed up, we don't delete the generated rst # files to reuse in Read the Docs build if "READTHEDOCS" in os.environ and os.path.isdir(whatsnew_dir): return dev_dir = "%s/../../dev" % os.path.dirname(os.path.abspath(__file__)) spec = importlib.util.spec_from_file_location("gendoc", "%s/gendoc.py" % dev_dir)
def _init_from_pandas(self, pdf, *args): metadata = Metadata.from_pandas(pdf) reset_index = pdf.reset_index() reset_index.columns = metadata.all_fields self._init_from_spark(default_session().createDataFrame(reset_index), metadata)
import logging from distutils.version import LooseVersion import pandas as pd import pyarrow as pa import matplotlib.pyplot as plt from pyspark import __version__ from databricks import koalas from databricks.koalas import utils # Initialize Spark session that should be used in doctests or unittests. # Delta requires Spark 2.4.2+. See # https://github.com/delta-io/delta#compatibility-with-apache-spark-versions. if LooseVersion(__version__) >= LooseVersion("3.0.0"): session = utils.default_session( {"spark.jars.packages": "io.delta:delta-core_2.12:0.1.0"}) elif LooseVersion(__version__) >= LooseVersion("2.4.2"): session = utils.default_session( {"spark.jars.packages": "io.delta:delta-core_2.11:0.1.0"}) else: session = utils.default_session() @pytest.fixture(autouse=True) def add_ks(doctest_namespace): doctest_namespace['ks'] = koalas @pytest.fixture(autouse=True) def add_pd(doctest_namespace): if os.getenv("PANDAS_VERSION", None) is not None:
import pandas as pd import pyarrow as pa import matplotlib.pyplot as plt from pyspark import __version__ from databricks import koalas as ks from databricks.koalas import utils shared_conf = {"spark.sql.shuffle.partitions": "4"} # Initialize Spark session that should be used in doctests or unittests. # Delta requires Spark 2.4.2+. See # https://github.com/delta-io/delta#compatibility-with-apache-spark-versions. if LooseVersion(__version__) >= LooseVersion("3.0.0"): shared_conf["spark.jars.packages"] = "io.delta:delta-core_2.12:0.7.0" session = utils.default_session(shared_conf) elif LooseVersion(__version__) >= LooseVersion("2.4.2"): shared_conf["spark.jars.packages"] = "io.delta:delta-core_2.11:0.6.1" session = utils.default_session(shared_conf) else: session = utils.default_session(shared_conf) if os.getenv("DEFAULT_INDEX_TYPE", "") != "": ks.options.compute.default_index_type = os.getenv("DEFAULT_INDEX_TYPE") @pytest.fixture(scope="session", autouse=True) def session_termination(): yield # Share one session across all the tests. Repeating starting and stopping sessions and contexts # seems causing a memory leak for an unknown reason in PySpark.
def sql(query: str, globals=None, locals=None, **kwargs) -> DataFrame: """ Execute a SQL query and return the result as a Koalas DataFrame. This function also supports embedding Python variables (locals, globals, and parameters) in the SQL statement by wrapping them in curly braces. See examples section for details. In addition to the locals, globals and parameters, the function will also attempt to determine if the program currently runs in an IPython (or Jupyter) environment and to import the variables from this environment. The variables have the same precedence as globals. The following variable types are supported: - string - int - float - list, tuple, range of above types - Koalas DataFrame - Koalas Series - pandas DataFrame Parameters ---------- query : str the SQL query globals : dict, optional the dictionary of global variables, if explicitly set by the user locals : dict, optional the dictionary of local variables, if explicitly set by the user kwargs other variables that the user may want to set manually that can be referenced in the query Returns ------- Koalas DataFrame Examples -------- Calling a built-in SQL function. >>> ks.sql("select * from range(10) where id > 7") id 0 8 1 9 A query can also reference a local variable or parameter by wrapping them in curly braces: >>> bound1 = 7 >>> ks.sql("select * from range(10) where id > {bound1} and id < {bound2}", bound2=9) id 0 8 You can also wrap a DataFrame with curly braces to query it directly. Note that when you do that, the indexes, if any, automatically become top level columns. >>> mydf = ks.range(10) >>> x = range(4) >>> ks.sql("SELECT * from {mydf} WHERE id IN {x}") id 0 0 1 1 2 2 3 3 Queries can also be arbitrarily nested in functions: >>> def statement(): ... mydf2 = ks.DataFrame({"x": range(2)}) ... return ks.sql("SELECT * from {mydf2}") >>> statement() x 0 0 1 1 Mixing Koalas and pandas DataFrames in a join operation. Note that the index is dropped. >>> ks.sql(''' ... SELECT m1.a, m2.b ... FROM {table1} m1 INNER JOIN {table2} m2 ... ON m1.key = m2.key ... ORDER BY m1.a, m2.b''', ... table1=ks.DataFrame({"a": [1,2], "key": ["a", "b"]}), ... table2=pd.DataFrame({"b": [3,4,5], "key": ["a", "b", "b"]})) a b 0 1 3 1 2 4 2 2 5 Also, it is possible to query using Series. >>> myser = ks.Series({'a': [1.0, 2.0, 3.0], 'b': [15.0, 30.0, 45.0]}) >>> ks.sql("SELECT * from {myser}") 0 0 [1.0, 2.0, 3.0] 1 [15.0, 30.0, 45.0] """ if globals is None: globals = _get_ipython_scope() _globals = builtin_globals() if globals is None else dict(globals) _locals = builtin_locals() if locals is None else dict(locals) # The default choice is the globals _dict = dict(_globals) # The vars: _scope = _get_local_scope() _dict.update(_scope) # Then the locals _dict.update(_locals) # Highest order of precedence is the locals _dict.update(kwargs) return SQLProcessor(_dict, query, default_session()).execute()