Ejemplo n.º 1
0
"""Module for compiling Python functions into Impala UDFs"""

from __future__ import absolute_import, print_function

import os
import pkgutil

import llvm.core as lc
from numba import sigutils
from numba.compiler import compile_extra, Flags

from impala.util import warn_deprecate_ibis
from impala.udf.target import ImpalaTargetContext
from impala.udf.typing import impala_typing_context

warn_deprecate_ibis("impyla's numba-compiled UDF")

# functionality to compile Python UDFs into Impala-executable IR


def udf(signature):
    def wrapper(pyfunc):
        udfobj = UDF(pyfunc, signature)
        return udfobj

    return wrapper


class UDF(object):
    def __init__(self, pyfunc, signature):
        self.py_func = pyfunc
Ejemplo n.º 2
0
import csv
import six
from six.moves import map
from six.moves import zip

import pandas as pd
from impala.context import ImpalaContext

from impala.util import (as_pandas, _random_id, _py_to_sql_string,
                         _get_table_schema_hack, warn_deprecate_ibis)
from impala._sql_model import (_to_TableName, BaseTableRef, JoinTableRef,
                               SelectItem, SelectStmt, UnionStmt, Literal,
                               InlineView, TableName, Expr, _create_table,
                               _create_table_as_select, LimitElement)

warn_deprecate_ibis('BigDataFrame')

# utilities


def _numpy_dtype_to_impala_PrimitiveType(ty):
    """Convert numpy dtype to Impala type string.

    Used in converting pandas DataFrame to SQL/Impala
    """
    # based on impl in pandas.io.sql.PandasSQLTable._sqlalchemy_type()
    if ty is datetime.date:
        # TODO: this might be wrong
        return 'TIMESTAMP'
    if pd.core.common.is_datetime64_dtype(ty):
        # TODO: this might be wrong
Ejemplo n.º 3
0
from __future__ import absolute_import, print_function

import os
import pkgutil

import llvm.core as lc
from numba import sigutils
from numba.compiler import compile_extra, Flags

from impala.util import warn_deprecate_ibis
from impala.udf.target import ImpalaTargetContext
from impala.udf.typing import impala_typing_context


warn_deprecate_ibis("impyla's numba-compiled UDF")


# functionality to compile Python UDFs into Impala-executable IR

def udf(signature):
    def wrapper(pyfunc):
        udfobj = UDF(pyfunc, signature)
        return udfobj
    return wrapper


class UDF(object):

    def __init__(self, pyfunc, signature):
        self.py_func = pyfunc
Ejemplo n.º 4
0
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

from six import reraise

from impala.util import _random_id, warn_deprecate_ibis
from impala.dbapi import connect

warn_deprecate_ibis('ImpalaContext')


class ImpalaContext(object):
    def __init__(self,
                 temp_dir=None,
                 temp_db=None,
                 nn_host=None,
                 webhdfs_port=50070,
                 hdfs_user=None,
                 *args,
                 **kwargs):
        # args and kwargs get passed directly into impala.dbapi.connect()
        suffix = _random_id(length=8)
        self._temp_dir = '/tmp/impyla-%s' % (suffix
                                             if temp_dir is None else temp_dir)
Ejemplo n.º 5
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

from six import reraise

from impala.util import _random_id, warn_deprecate_ibis
from impala.dbapi import connect


warn_deprecate_ibis('ImpalaContext')


class ImpalaContext(object):

    def __init__(self, temp_dir=None, temp_db=None, nn_host=None,
                 webhdfs_port=50070, hdfs_user=None, *args, **kwargs):
        # args and kwargs get passed directly into impala.dbapi.connect()
        suffix = _random_id(length=8)
        self._temp_dir = '/tmp/impyla-%s' % (
            suffix if temp_dir is None else temp_dir)
        self._temp_db = 'tmp_impyla_%s' % (
            suffix if temp_db is None else temp_db)
        self._conn = connect(*args, **kwargs)
        self._cursor = self._conn.cursor()
        # used for webhdfs cleanup of temp dir; not required
Ejemplo n.º 6
0
import six
from six.moves import map
from six.moves import zip

import pandas as pd
from impala.context import ImpalaContext

from impala.util import (as_pandas, _random_id, _py_to_sql_string,
                         _get_table_schema_hack, warn_deprecate_ibis)
from impala._sql_model import (_to_TableName, BaseTableRef, JoinTableRef,
                               SelectItem, SelectStmt, UnionStmt, Literal,
                               InlineView, TableName, Expr, _create_table,
                               _create_table_as_select, LimitElement)


warn_deprecate_ibis('BigDataFrame')


# utilities

def _numpy_dtype_to_impala_PrimitiveType(ty):
    """Convert numpy dtype to Impala type string.

    Used in converting pandas DataFrame to SQL/Impala
    """
    # based on impl in pandas.io.sql.PandasSQLTable._sqlalchemy_type()
    if ty is datetime.date:
        # TODO: this might be wrong
        return 'TIMESTAMP'
    if pd.core.common.is_datetime64_dtype(ty):
        # TODO: this might be wrong
Ejemplo n.º 7
0
# limitations under the License.

from __future__ import absolute_import, division

import struct
import six
from six.moves import range

import numpy as np
from sklearn.base import BaseEstimator

from impala.blob import BlobStore
from impala.util import create_view_from_query, drop_view, warn_deprecate_ibis


warn_deprecate_ibis('impyla sklearn')


# TO CREATE A NEW ESTIMATOR:
#
# 1. Subclass ImpalaEstimator
#
# 2. Override _uda_name() to return the name of the registered UDA for this
# estimator.  The UDA should have a signature like so:
#
#   uda(prev_model, )
#
# 3. Override _parameter_list() to return a string that is a comma-separated
# list of the expected parameter values to add to the end of the UDA call.  No
# parameters means you should return an empty string ''.
#
Ejemplo n.º 8
0
doing.  It also allows you to supply the name of a UDF that will decode ASCII-
encoded data (e.g., Base64).

This command also makes it easier to generate the necessary SQL to distribute
the side data around to other queries (e.g., cross-join).  A typical use case
would be to store model parameters as binary data for UDFs.
"""

from __future__ import absolute_import

import six

from impala.util import (_py_to_sql_string, _get_table_schema_hack,
                         _gen_safe_random_table_name, warn_deprecate_ibis)

warn_deprecate_ibis('BlobStore')


class BlobStore(object):
    def __init__(self, ic, name=None):
        self._ic = ic
        self._name = name

        if self._name is None:
            # TODO: this should take the db name into account when generating
            table_name = _gen_safe_random_table_name(ic._cursor, prefix='blob')
            self._name = "%s.%s" % (self._ic._temp_db, table_name)
            self._create_blob_table()
        self._validate_schema()

    @property