"""Module for compiling Python functions into Impala UDFs""" from __future__ import absolute_import, print_function import os import pkgutil import llvm.core as lc from numba import sigutils from numba.compiler import compile_extra, Flags from impala.util import warn_deprecate_ibis from impala.udf.target import ImpalaTargetContext from impala.udf.typing import impala_typing_context warn_deprecate_ibis("impyla's numba-compiled UDF") # functionality to compile Python UDFs into Impala-executable IR def udf(signature): def wrapper(pyfunc): udfobj = UDF(pyfunc, signature) return udfobj return wrapper class UDF(object): def __init__(self, pyfunc, signature): self.py_func = pyfunc
import csv import six from six.moves import map from six.moves import zip import pandas as pd from impala.context import ImpalaContext from impala.util import (as_pandas, _random_id, _py_to_sql_string, _get_table_schema_hack, warn_deprecate_ibis) from impala._sql_model import (_to_TableName, BaseTableRef, JoinTableRef, SelectItem, SelectStmt, UnionStmt, Literal, InlineView, TableName, Expr, _create_table, _create_table_as_select, LimitElement) warn_deprecate_ibis('BigDataFrame') # utilities def _numpy_dtype_to_impala_PrimitiveType(ty): """Convert numpy dtype to Impala type string. Used in converting pandas DataFrame to SQL/Impala """ # based on impl in pandas.io.sql.PandasSQLTable._sqlalchemy_type() if ty is datetime.date: # TODO: this might be wrong return 'TIMESTAMP' if pd.core.common.is_datetime64_dtype(ty): # TODO: this might be wrong
from __future__ import absolute_import, print_function import os import pkgutil import llvm.core as lc from numba import sigutils from numba.compiler import compile_extra, Flags from impala.util import warn_deprecate_ibis from impala.udf.target import ImpalaTargetContext from impala.udf.typing import impala_typing_context warn_deprecate_ibis("impyla's numba-compiled UDF") # functionality to compile Python UDFs into Impala-executable IR def udf(signature): def wrapper(pyfunc): udfobj = UDF(pyfunc, signature) return udfobj return wrapper class UDF(object): def __init__(self, pyfunc, signature): self.py_func = pyfunc
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from six import reraise from impala.util import _random_id, warn_deprecate_ibis from impala.dbapi import connect warn_deprecate_ibis('ImpalaContext') class ImpalaContext(object): def __init__(self, temp_dir=None, temp_db=None, nn_host=None, webhdfs_port=50070, hdfs_user=None, *args, **kwargs): # args and kwargs get passed directly into impala.dbapi.connect() suffix = _random_id(length=8) self._temp_dir = '/tmp/impyla-%s' % (suffix if temp_dir is None else temp_dir)
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from six import reraise from impala.util import _random_id, warn_deprecate_ibis from impala.dbapi import connect warn_deprecate_ibis('ImpalaContext') class ImpalaContext(object): def __init__(self, temp_dir=None, temp_db=None, nn_host=None, webhdfs_port=50070, hdfs_user=None, *args, **kwargs): # args and kwargs get passed directly into impala.dbapi.connect() suffix = _random_id(length=8) self._temp_dir = '/tmp/impyla-%s' % ( suffix if temp_dir is None else temp_dir) self._temp_db = 'tmp_impyla_%s' % ( suffix if temp_db is None else temp_db) self._conn = connect(*args, **kwargs) self._cursor = self._conn.cursor() # used for webhdfs cleanup of temp dir; not required
import six from six.moves import map from six.moves import zip import pandas as pd from impala.context import ImpalaContext from impala.util import (as_pandas, _random_id, _py_to_sql_string, _get_table_schema_hack, warn_deprecate_ibis) from impala._sql_model import (_to_TableName, BaseTableRef, JoinTableRef, SelectItem, SelectStmt, UnionStmt, Literal, InlineView, TableName, Expr, _create_table, _create_table_as_select, LimitElement) warn_deprecate_ibis('BigDataFrame') # utilities def _numpy_dtype_to_impala_PrimitiveType(ty): """Convert numpy dtype to Impala type string. Used in converting pandas DataFrame to SQL/Impala """ # based on impl in pandas.io.sql.PandasSQLTable._sqlalchemy_type() if ty is datetime.date: # TODO: this might be wrong return 'TIMESTAMP' if pd.core.common.is_datetime64_dtype(ty): # TODO: this might be wrong
# limitations under the License. from __future__ import absolute_import, division import struct import six from six.moves import range import numpy as np from sklearn.base import BaseEstimator from impala.blob import BlobStore from impala.util import create_view_from_query, drop_view, warn_deprecate_ibis warn_deprecate_ibis('impyla sklearn') # TO CREATE A NEW ESTIMATOR: # # 1. Subclass ImpalaEstimator # # 2. Override _uda_name() to return the name of the registered UDA for this # estimator. The UDA should have a signature like so: # # uda(prev_model, ) # # 3. Override _parameter_list() to return a string that is a comma-separated # list of the expected parameter values to add to the end of the UDA call. No # parameters means you should return an empty string ''. #
doing. It also allows you to supply the name of a UDF that will decode ASCII- encoded data (e.g., Base64). This command also makes it easier to generate the necessary SQL to distribute the side data around to other queries (e.g., cross-join). A typical use case would be to store model parameters as binary data for UDFs. """ from __future__ import absolute_import import six from impala.util import (_py_to_sql_string, _get_table_schema_hack, _gen_safe_random_table_name, warn_deprecate_ibis) warn_deprecate_ibis('BlobStore') class BlobStore(object): def __init__(self, ic, name=None): self._ic = ic self._name = name if self._name is None: # TODO: this should take the db name into account when generating table_name = _gen_safe_random_table_name(ic._cursor, prefix='blob') self._name = "%s.%s" % (self._ic._temp_db, table_name) self._create_blob_table() self._validate_schema() @property