Example #1
0
def _main():
    import argparse

    parser = argparse.ArgumentParser(
        prog="python -m sklearnex",
        description="""
            Run your Python script with Intel(R) Extension for
            scikit-learn, optimizing solvers of
            scikit-learn with Intel(R) oneAPI Data Analytics Library.
            """,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('-m',
                        action='store_true',
                        dest='module',
                        help="Executes following as a module")
    parser.add_argument('name', help="Script or module name")
    parser.add_argument('args',
                        nargs=argparse.REMAINDER,
                        help="Command line arguments")
    args = parser.parse_args()

    try:
        import sklearn
        patch_sklearn()
    except ImportError:
        print("Scikit-learn could not be imported. Nothing to patch")

    sys.argv = [args.name] + args.args
    if '_' + args.name in globals():
        return globals()['_' + args.name](*args.args)
    import runpy
    runf = runpy.run_module if args.module else runpy.run_path
    runf(args.name, run_name='__main__')
Example #2
0
    def __init__(self, **kwargs):
        # Define word lists
        self.function_words_single = kwargs.get('function_words_single') \
            if kwargs.get('function_words_single') else settings.FUNCTION_WORDS_SINGLE
        self.function_words = kwargs.get('function_words') if kwargs.get('function_words') else settings.FUNCTION_WORDS
        self.positive_words = kwargs.get('positive_words') if kwargs.get('positive_words') else settings.POSITIVE_WORDS
        self.negative_words = kwargs.get('negative_words') if kwargs.get('negative_words') else settings.NEGATIVE_WORDS
        self.speed_up = kwargs.get('speed_up') if kwargs.get('speed_up') else False
        self.stop_words = self.function_words_single + self.positive_words + self.negative_words
        self.sentiment_words = self.positive_words + self.negative_words
        
        #Retain svm function from previous version
        self.svm = partial(self.shallow_classification, classifier="svm")

        # Specific paths for the course labs
        self.data_dir = kwargs.get('data_dir') if kwargs.get('data_dir') else settings.DATA_DIR
        self.states_dir = kwargs.get('states_dir') if kwargs.get('states_dir') else settings.STATES_DIR

        self.loader = ExternalFileLoader(data_dir=self.data_dir, states_dir=self.states_dir)
        self.settings = Settings()
        
        #Use Intel Sklearn Speed-Up (will increase memory)
        if self.speed_up == True:
            try:
                from sklearnex import patch_sklearn
                patch_sklearn()
            except:
                pass
                
        self.serializers = {"phrases": PhrasesSerializer,
               "w2v_embedding": W2vEmbeddingSerializer,
               "w2v_vocab": W2vVocabSerializer,
               "tfidf_model": TfIdfSerializer,
               "lda_model": LdaModelSerializer,
               "lda_dictionary": LdaDictionarySerializer}
Example #3
0
def _main():
    import argparse

    # Adding custom extend action for support all python versions
    class ExtendAction(argparse.Action):
        def __call__(self, parser, namespace, values, option_string=None):
            items = getattr(namespace, self.dest) or []
            items.extend(values)
            setattr(namespace, self.dest, items)

    parser = argparse.ArgumentParser(
        prog="python -m sklearnex.glob",
        description="""
            Patch all your Scikit-learn applications using Intel(R) Extension for
            scikit-learn.""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.register('action', 'extend', ExtendAction)
    parser.add_argument('action', choices=["patch_sklearn", "unpatch_sklearn"],
                        help="Enable or Disable patching")
    parser.add_argument('--no-verbose', '-nv', action='store_false',
                        help="Disable additional information about enabling patching")
    parser.add_argument('--algorithm', '-a', action='extend', type=str, nargs="+",
                        help="The name of an algorithm to be patched globally")
    args = parser.parse_args()

    if args.action == "patch_sklearn":
        patch_sklearn(name=args.algorithm, verbose=args.no_verbose, global_patch=True)
    elif args.action == "unpatch_sklearn":
        unpatch_sklearn(global_unpatch=True)
    else:
        raise RuntimeError("Invalid choice for the action attribute."
                           " Expected: patch_sklearn or unpatch_sklearn."
                           f" Got {args.action}")
Example #4
0
def test_unpatch_by_list_many_estimators():
    sklearnex.patch_sklearn()

    from sklearn.ensemble import RandomForestRegressor
    from sklearn.neighbors import KNeighborsRegressor
    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import SVC

    assert RandomForestRegressor.__module__.startswith('daal4py')
    assert KNeighborsRegressor.__module__.startswith('daal4py')
    assert LogisticRegression.__module__.startswith('daal4py')
    assert SVC.__module__.startswith('daal4py') or SVC.__module__.startswith(
        'sklearnex')

    sklearnex.unpatch_sklearn(["KNeighborsRegressor", "RandomForestRegressor"])

    from sklearn.ensemble import RandomForestRegressor
    from sklearn.neighbors import KNeighborsRegressor
    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import SVC

    assert RandomForestRegressor.__module__.startswith('sklearn')
    assert KNeighborsRegressor.__module__.startswith('sklearn')
    assert LogisticRegression.__module__.startswith('daal4py')
    assert SVC.__module__.startswith('daal4py') or SVC.__module__.startswith(
        'sklearnex')
Example #5
0
def test_monkey_patching():
    _tokens = sklearnex.get_patch_names()
    _values = sklearnex.get_patch_map().values()
    _classes = list()

    for v in _values:
        for c in v:
            _classes.append(c[0])

    sklearnex.patch_sklearn()

    for i, _ in enumerate(_tokens):
        t = _tokens[i]
        p = _classes[i][0]
        n = _classes[i][1]

        class_module = getattr(p, n).__module__
        assert \
            class_module.startswith('daal4py') or class_module.startswith('sklearnex'), \
            "Patching has completed with error."

    for i, _ in enumerate(_tokens):
        t = _tokens[i]
        p = _classes[i][0]
        n = _classes[i][1]

        sklearnex.unpatch_sklearn(t)
        class_module = getattr(p, n).__module__
        assert class_module.startswith('sklearn'), \
            "Unpatching has completed with error."

    sklearnex.unpatch_sklearn()

    for i, _ in enumerate(_tokens):
        t = _tokens[i]
        p = _classes[i][0]
        n = _classes[i][1]

        class_module = getattr(p, n).__module__
        assert class_module.startswith('sklearn'), \
            "Unpatching has completed with error."

    sklearnex.unpatch_sklearn()

    for i, _ in enumerate(_tokens):
        t = _tokens[i]
        p = _classes[i][0]
        n = _classes[i][1]

        sklearnex.patch_sklearn(t)

        class_module = getattr(p, n).__module__
        assert \
            class_module.startswith('daal4py') or class_module.startswith('sklearnex'), \
            "Patching has completed with error."

    sklearnex.unpatch_sklearn()
Example #6
0
def test_monkey_patching():
    _tokens = sklearnex.get_patch_names()
    _values = sklearn_patch_map().values()
    _classes = list()
    for v in _values:
        _classes.append(v[0][0])

    assert len(_tokens) == len(_classes)
    assert isinstance(_tokens, list) and len(_tokens) > 0, \
        "Internal Error: list of patched names has unexcepable format."

    sklearnex.patch_sklearn()

    for i, _ in enumerate(_tokens):
        t = _tokens[i]
        p = _classes[i][0]
        n = _classes[i][1]

        class_module = getattr(p, n).__module__
        assert class_module.startswith('daal4py'), \
            "Patching has completed with error."

        sklearnex.unpatch_sklearn(t)
        print(p, n)
        class_module = getattr(p, n).__module__
        assert class_module.startswith('sklearn'), \
            "Unpatching has completed with error."

    sklearnex.unpatch_sklearn()

    for i, _ in enumerate(_tokens):
        t = _tokens[i]
        p = _classes[i][0]
        n = _classes[i][1]

        class_module = getattr(p, n).__module__
        assert class_module.startswith('sklearn'), \
            "Unpatching has completed with error."

        sklearnex.patch_sklearn(t)

        class_module = getattr(p, n).__module__
        assert class_module.startswith('daal4py'), \
            "Patching has completed with error."

    sklearnex.unpatch_sklearn()
Example #7
0
 def _get_model_type(self):
     if self.params_aux.get('use_daal', True):
         try:
             # TODO: Add more granular switch, currently this affects all future KNN models even if they had `use_daal=False`
             from sklearnex import patch_sklearn
             patch_sklearn("knn_classifier")
             patch_sklearn("knn_regressor")
             # sklearnex backend for KNN seems to be 20-40x+ faster than native sklearn with no downsides.
             logger.log(15, '\tUsing sklearnex KNN backend...')
         except:
             pass
     try:
         from ._knn_loo_variants import KNeighborsClassifier, KNeighborsRegressor
     except:
         from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
         logger.warning('WARNING: Leave-one-out variants of KNN failed to import. Falling back to standard KNN implementations.')
     if self.problem_type == REGRESSION:
         return KNeighborsRegressor
     else:
         return KNeighborsClassifier
Example #8
0
 def _get_model_type(self):
     penalty = self.params.get('penalty', 'L2')
     if self.params_aux.get('use_daal', False):
         # Disabled by default until more testing is done, appears to give 20x training speedup when enabled
         try:
             # TODO: Add more granular switch, currently this affects all future LR models even if they had `use_daal=False`
             from sklearnex import patch_sklearn
             patch_sklearn("ridge")
             patch_sklearn("lasso")
             patch_sklearn("logistic")
             logger.log(15, '\tUsing daal4py LR backend...')
         except:
             pass
     from sklearn.linear_model import LogisticRegression, Ridge, Lasso
     if self.problem_type == REGRESSION:
         if penalty == 'L2':
             model_type = Ridge
         elif penalty == 'L1':
             model_type = Lasso
         else:
             raise AssertionError(
                 f'Unknown value for penalty "{penalty}" - supported types are ["L1", "L2"]'
             )
     else:
         model_type = LogisticRegression
     return model_type
Example #9
0
def parse_args(parser,
               size=None,
               loop_types=(),
               n_jobs_supported=True,
               prefix='sklearn'):
    '''
    Add common arguments useful for most benchmarks and parse.

    Parameters
    ----------
    parser : argparse.ArgumentParser
        Parser to which the arguments should be added.
    size : tuple of int, optional
        Enable '--size' argument with this default size.
        If None (default), no '--size' argument will be added.
    loop_types : iterable of str, optional
        Add arguments like '--fit-inner-loops' and '--fit-outer-loops',
        useful for tweaking runtime of the benchmark.
    n_jobs_supported : bool
        If set to True, generate a n_jobs member in the argparse Namespace
        corresponding to the optimal n_jobs parameter for scikit-learn.
        Otherwise, n_jobs will be set to None.
    prefix : str, optional, default 'sklearn'
        The default prefix to report

    Returns
    -------
    parser : argparse.ArgumentParser
        Parser to which the arguments were added.
        This is the same parser that was passed to this function.
    '''

    parser.add_argument('-n',
                        '--num-threads',
                        '--core-number',
                        default=-1,
                        dest='threads',
                        type=int,
                        help='Number of threads to use')
    parser.add_argument('-a',
                        '--arch',
                        default='?',
                        help='Machine architecture, for bookkeeping')
    parser.add_argument('-b',
                        '--batch',
                        '--batchID',
                        default='?',
                        help='Batch ID, for bookkeeping')
    parser.add_argument('-p',
                        '--prefix',
                        default=prefix,
                        help='Prefix string, for bookkeeping')
    parser.add_argument('-v',
                        '--verbose',
                        default=False,
                        action='store_true',
                        help='Output extra debug messages')
    parser.add_argument('--data-format',
                        type=str,
                        default='numpy',
                        choices=('numpy', 'pandas', 'cudf'),
                        help='Data format: numpy (default), pandas, cudf')
    parser.add_argument('--data-order',
                        type=str,
                        default='C',
                        choices=('C', 'F'),
                        help='Data order: C (row-major, default) or'
                        'F (column-major)')
    parser.add_argument('-d',
                        '--dtype',
                        type=np.dtype,
                        default=np.float64,
                        choices=(np.float32, np.float64),
                        help='Data type: float64 (default) or float32')
    parser.add_argument('--check-finiteness',
                        default=False,
                        action='store_true',
                        help='Check finiteness in sklearn input check'
                        '(disabled by default)')
    parser.add_argument('--output-format',
                        type=str,
                        default='json',
                        choices=('json'),
                        help='Output format: json')
    parser.add_argument('--time-method',
                        type=str,
                        default='box_filter',
                        choices=('box_filter'),
                        help='Method used for time mesurements')
    parser.add_argument('--box-filter-measurements',
                        type=int,
                        default=100,
                        help='Maximum number of measurements in box filter')
    parser.add_argument('--inner-loops',
                        default=100,
                        type=int,
                        help='Maximum inner loop iterations '
                        '(we take the mean over inner iterations)')
    parser.add_argument('--outer-loops',
                        default=100,
                        type=int,
                        help='Maximum outer loop iterations '
                        '(we take the min over outer iterations)')
    parser.add_argument('--time-limit',
                        default=10.,
                        type=float,
                        help='Target time to spend to benchmark')
    parser.add_argument('--goal-outer-loops',
                        default=10,
                        type=int,
                        dest='goal',
                        help='Number of outer loops to aim '
                        'while automatically picking number of '
                        'inner loops. If zero, do not automatically '
                        'decide number of inner loops.')
    parser.add_argument('--seed',
                        type=int,
                        default=12345,
                        help='Seed to pass as random_state')
    parser.add_argument('--dataset-name',
                        type=str,
                        default=None,
                        help='Dataset name')
    parser.add_argument('--no-intel-optimized',
                        default=False,
                        action='store_true',
                        help='Use no intel optimized version. '
                        'Now avalible for scikit-learn benchmarks')
    parser.add_argument('--device',
                        default='None',
                        type=str,
                        choices=('host', 'cpu', 'gpu', 'None'),
                        help='Execution context device')

    for data in ['X', 'y']:
        for stage in ['train', 'test']:
            parser.add_argument(f'--file-{data}-{stage}',
                                type=argparse.FileType('r'),
                                help=f'Input file with {data}_{stage},'
                                'in NPY format')

    if size is not None:
        parser.add_argument('-s',
                            '--size',
                            default=size,
                            type=_parse_size,
                            dest='shape',
                            help='Problem size, delimited by "x" or ","')

    params = parser.parse_args()

    if not params.no_intel_optimized:
        try:
            from sklearnex import patch_sklearn
            patch_sklearn()
        except ImportError:
            logging.info(
                'Failed to import sklearnex.patch_sklearn.'
                'Use stock version scikit-learn',
                file=sys.stderr)
            params.device = 'None'
    else:
        if params.device != 'None':
            logging.info(
                'Device context is not supported for stock scikit-learn.'
                'Please use --no-intel-optimized=False with'
                f'--device={params.device} parameter. Fallback to --device=None.',
                file=sys.stderr)
            params.device = 'None'

    # disable finiteness check (default)
    if not params.check_finiteness:
        sklearn_disable_finiteness_check()

    # Ask DAAL what it thinks about this number of threads
    num_threads = prepare_daal_threads(num_threads=params.threads)
    if params.verbose:
        logging.info(f'@ DAAL gave us {num_threads} threads')

    n_jobs = None
    if n_jobs_supported:
        n_jobs = num_threads = params.threads

    # Set threading and DAAL related params here
    setattr(params, 'threads', num_threads)
    setattr(params, 'n_jobs', n_jobs)

    # Set size string parameter for easy printing
    if size is not None:
        setattr(params, 'size', size_str(params.shape))

    # Very verbose output
    if params.verbose:
        logging.info(f'@ params = {params.__dict__}')

    return params
Example #10
0
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearnex import patch_sklearn
from datetime import datetime
import pandas as pd

patch_sklearn()

time_start = datetime.now()

# Dataset
iris = pd.read_csv(
    'https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/0e7a9b0a5d22642a06d3d5b9bcbad9890c8ee534/iris.csv'
)
time_load = datetime.now()
print(f'Dataset loaded, runtime = {(time_load - time_start).seconds} seconds')

# Train/Test split
X = iris.drop('species', axis=1)
y = iris['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
time_split = datetime.now()
print(
    f'Train/test split, runtime = {(time_split - time_start).seconds} seconds')

# Hyperparameter tuning
model = DecisionTreeClassifier()
params = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
Example #11
0
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import sys
import time
import modin.pandas as pd

from sklearn import config_context
import sklearnex

sklearnex.patch_sklearn()
from sklearn.model_selection import train_test_split
import sklearn.linear_model as lm
import numpy as np


def read(filename):
    columns_names = [
        "YEAR0",
        "DATANUM",
        "SERIAL",
        "CBSERIAL",
        "HHWT",
        "CPI99",
        "GQ",
        "QGQ",
       not SVR.__module__.startswith('sklearnex')

from sklearnex import patch_sklearn, unpatch_sklearn

# test unpatching from command line
err_code = subprocess.call(
    [sys.executable, "-m", "sklearnex.glob", "unpatch_sklearn"])
assert not err_code
unpatch_sklearn()
from sklearn.svm import SVC, SVR
assert not SVR.__module__.startswith('daal4py') and \
       not SVR.__module__.startswith('sklearnex')
assert not SVR.__module__.startswith('daal4py') and \
       not SVR.__module__.startswith('sklearnex')

# test patching from function
patch_sklearn(name=['svc'], global_patch=True)
from sklearn.svm import SVC, SVR
assert SVC.__module__.startswith('daal4py') or \
       SVC.__module__.startswith('sklearnex')
assert not SVR.__module__.startswith('daal4py') and \
       not SVR.__module__.startswith('sklearnex')

# test unpatching from function
unpatch_sklearn(global_unpatch=True)
from sklearn.svm import SVC, SVR
assert not SVR.__module__.startswith('daal4py') and \
       not SVR.__module__.startswith('sklearnex')
assert not SVR.__module__.startswith('daal4py') and \
       not SVR.__module__.startswith('sklearnex')