Esempio n. 1
0
Naive Estimator
****************
A naive estimator is a useful baseline against which to benchmark more complex models.
A naive estimator will return the mean of the outcome for regression models and
the plurality class for classification models. Note that currently only binary classification
is implemented. For binary classifiers, the majority class will be returned
"""
from __future__ import absolute_import
import inspect
import logging
import lore
from lore.env import require
import lore.estimators
from lore.util import timed, before_after_callbacks

require(lore.dependencies.NUMPY)
import numpy


class Base(lore.estimators.Base):
    """Base class for the Naive estimator. Implements functionality common to all Naive models"""
    def __init__(self):
        super(Base, self).__init__()

    @before_after_callbacks
    @timed(logging.INFO)
    def fit(self, x, y, **kwargs):
        """
        Fit a naive model
        :param x: Predictors to use for fitting the data (this will not be used in naive models)
        :param y: Outcome
Esempio n. 2
0
from __future__ import unicode_literals, absolute_import
from abc import ABCMeta, abstractmethod

import csv
import datetime
import logging
import os
import re

import lore
from lore.util import timer
from lore.env import require

require(
    lore.dependencies.NUMPY +
    lore.dependencies.INFLECTION +
    lore.dependencies.PANDAS
)

from numpy import sin, cos, sqrt, arctan2, radians

import inflection
import numpy
import pandas


logger = logging.getLogger(__name__)


class Base(object):
    __metaclass__ = ABCMeta
Esempio n. 3
0
from __future__ import absolute_import

import json
import logging
import os.path
from os.path import join
import pickle
import re

import lore.ansi
import lore.estimators
from lore.env import require
from lore.util import timer, timed

require(lore.dependencies.TABULATE + lore.dependencies.SKLEARN +
        lore.dependencies.SHAP)
import shap
from tabulate import tabulate
from sklearn.model_selection import RandomizedSearchCV

logger = logging.getLogger(__name__)

try:
    FileExistsError
except NameError:
    FileExistsError = OSError


class Base(object):
    def __init__(self, pipeline=None, estimator=None):
        self.name = self.__module__ + '.' + self.__class__.__name__
Esempio n. 4
0
# -*- coding: utf-8 -*-
import datetime
from abc import ABCMeta, abstractmethod

import lore
from lore.env import require

require(lore.dependencies.PANDAS + lore.dependencies.INFLECTION)

import pandas
import inflection


class Base(object):
    __metaclass__ = ABCMeta

    def __init__(self):
        self._data = pandas.DataFrame()

    @abstractmethod
    def key(self):
        """
        :return: Composite or a single key for index
        """
        pass

    @abstractmethod
    def get_data(self):
        pass

    @abstractmethod
Esempio n. 5
0
from __future__ import absolute_import
import atexit
import inspect
import logging
import warnings

import lore.io
from lore.callbacks import ReloadBest
from lore.encoders import Continuous, Pass
from lore.pipelines import Observations
from lore.util import timed, before_after_callbacks
from lore.env import require

require(lore.dependencies.KERAS + lore.dependencies.NUMPY +
        lore.dependencies.PANDAS + lore.dependencies.SKLEARN)

import keras
import keras.backend
from keras.callbacks import EarlyStopping, TensorBoard, TerminateOnNaN
from keras.layers import Input, Embedding, Dense, Reshape, Concatenate, Dropout, SimpleRNN, Flatten, LSTM, GRU, BatchNormalization
from keras.optimizers import Adam
import numpy
import pandas
from sklearn.base import BaseEstimator
import tensorflow
from tensorflow.python.client.timeline import Timeline
from tensorflow.python.client import device_lib

available_gpus = len([
    x.name for x in device_lib.list_local_devices() if x.device_type == 'GPU'
])
Esempio n. 6
0
    def __init__(self, url, name='connection', watermark=True, **kwargs):
        if not sqlalchemy:
            raise lore.env.ModuleNotFoundError('No module named sqlalchemy. Please add it to requirements.txt.')

        parsed = lore.env.parse_url(url)
        self.adapter = parsed.scheme

        if self.adapter == 'postgres':
            require(lore.dependencies.POSTGRES)
        if self.adapter == 'snowflake':
            require(lore.dependencies.SNOWFLAKE)
            if 'numpy' not in parsed.query:
                logger.error('You should add `?numpy=True` query param to your snowflake connection url to ensure proper compatibility')

        for int_value in ['pool_size', 'pool_recycle', 'max_overflow']:
            if int_value in kwargs:
                kwargs[int_value] = int(kwargs[int_value])
        if 'poolclass' in kwargs:
            kwargs['poolclass'] = getattr(sqlalchemy.pool, kwargs['poolclass'])
        if '__name__' in kwargs:
            del kwargs['__name__']
        if 'echo' not in kwargs:
            kwargs['echo'] = False
        logger.info("Creating engine: %s %s" % (url, kwargs))
        self._engine = sqlalchemy.create_engine(url, **kwargs).execution_options(autocommit=True)
        self._metadata = None
        self.name = name
        self.url = url
        self._transactions = []
        self.__thread_local = threading.local()

        @event.listens_for(self._engine, "before_cursor_execute", retval=True)
        def comment_sql_calls(conn, cursor, statement, parameters, context, executemany):
            conn.info.setdefault('query_start_time', []).append(datetime.now())
            if watermark:
                stack = inspect.stack()[1:-1]
                if sys.version_info.major == 3:
                    stack = [(x.filename, x.lineno, x.function) for x in stack]
                else:
                    stack = [(x[1], x[2], x[3]) for x in stack]

                paths = [x[0] for x in stack]
                origin = next((x for x in paths if x.startswith(lore.env.ROOT)), None)
                if origin is None:
                    origin = next((x for x in paths if 'sqlalchemy' not in x), None)
                if origin is None:
                    origin = paths[0]
                caller = next(x for x in stack if x[0] == origin)

                statement = "/* %s | %s:%d in %s */\n" % (lore.env.APP, caller[0], caller[1], caller[2]) + statement
            return statement, parameters

        @event.listens_for(self._engine, "after_cursor_execute")
        def time_sql_calls(conn, cursor, statement, parameters, context, executemany):
            total = datetime.now() - conn.info['query_start_time'].pop(-1)
            logger.info("SQL: %s" % total)

        @event.listens_for(self._engine, "connect")
        def receive_connect(dbapi_connection, connection_record):
            if hasattr(dbapi_connection, 'get_dsn_parameters'):
                logger.info("connect: %s" % dbapi_connection.get_dsn_parameters())
Esempio n. 7
0
        options = config._sections[section]
        if options.get('url') == '$DATABASE_URL':
            logger.error(
                '$DATABASE_URL is not set, but is used in config/database.cfg. Skipping connection.'
            )
        else:
            vars()[section.lower()] = Connection(name=section.lower(),
                                                 **options)

if 'metadata' not in vars():
    vars()['metadata'] = Connection('sqlite:///%s/metadata.sqlite' %
                                    lore.env.DATA_DIR)

redis_config = lore.env.REDIS_CONFIG
if redis_config:
    require(lore.dependencies.REDIS)
    import redis

    for section in redis_config.sections():
        vars()[section.lower()] = redis.StrictRedis(
            host=redis_config.get(section, 'url'),
            port=redis_config.get(section, 'port'))

s3 = None
bucket = None
if lore.env.AWS_CONFIG:
    require(lore.dependencies.S3)
    import boto3
    from botocore.exceptions import ClientError

    config = lore.env.AWS_CONFIG
Esempio n. 8
0
from __future__ import unicode_literals
from abc import ABCMeta, abstractmethod
import os
import re
import logging
from datetime import timedelta

import lore
import lore.transformers
from lore.env import require
from lore.util import timer

require(lore.dependencies.INFLECTION + lore.dependencies.NUMPY +
        lore.dependencies.PANDAS + lore.dependencies.SMART_OPEN)

import inflection
import numpy
import pandas
from smart_open import smart_open
import lore
import lore.transformers
from lore.util import timer, get_relevant_args

logger = logging.getLogger(__name__)
TWIN = '_twin'


class Base(object):
    """
    Encoders reduces a data set to a more efficient representation suitable
    for learning. Encoders may be lossy, and should first be `fit` after
Esempio n. 9
0
import re
import sys
import tempfile
import threading
import uuid

from datetime import datetime

import lore
from lore.env import require
from lore.util import timer
from lore.stores import query_cached

require(
    lore.dependencies.PANDAS +
    lore.dependencies.SQL +
    lore.dependencies.JINJA
)

import pandas

import sqlalchemy
from sqlalchemy import event
from sqlalchemy.schema import DropTable
from sqlalchemy.ext.compiler import compiles

import jinja2
jinja2_env = jinja2.Environment(
    loader=jinja2.FileSystemLoader(
        os.path.join(lore.env.ROOT, lore.env.APP, 'extracts')
    ),
Esempio n. 10
0
import os
from os.path import join, dirname
import logging
import botocore

import lore
from lore.util import timer
from lore.env import require

require(lore.dependencies.H5PY)
import h5py

try:
    FileExistsError
except NameError:
    FileExistsError = OSError

logger = logging.getLogger(__name__)


class Base(lore.models.base.Base):
    def __init__(self, pipeline, estimator):
        super(Base, self).__init__(pipeline, estimator)

    def weights_path(self):
        return join(self.fitting_path(), 'weights.h5')

    def checkpoint_path(self):
        return join(self.fitting_path(), 'checkpoints/{epoch}.h5')

    def tensorboard_path(self):
Esempio n. 11
0
scikit-learn Estimator
****************
This estimator allows you to use any scikit-learn estimator of your choice.
Note that the underlying estimator can always be accessed as ``Base(estimator).sklearn``
"""
from __future__ import absolute_import
import inspect
import logging
import warnings

import lore
import lore.estimators
from lore.env import require
from lore.util import timed, before_after_callbacks

require(lore.dependencies.SKLEARN)


class Base(lore.estimators.Base):
    def __init__(self, estimator):
        super(Base, self).__init__()
        self.sklearn = estimator

    @before_after_callbacks
    @timed(logging.INFO)
    def fit(self,
            x,
            y,
            validation_x=None,
            validation_y=None,
            **sklearn_kwargs):
Esempio n. 12
0
import os
import gc
import pickle

import lore
from lore.stores.base import Base
from lore.util import timer
from lore.env import require

require(lore.dependencies.PANDAS)

import pandas
from datetime import datetime

try:
    FileExistsError
except NameError:
    FileExistsError = OSError


class Disk(Base):
    EXTENSION = '.pickle'

    def __init__(self, dir):
        self.dir = dir
        self.limit = None
        if not os.path.exists(self.dir):
            try:
                os.makedirs(self.dir)
            except FileExistsError as ex:
                pass  # race to create
Esempio n. 13
0
import inspect
import importlib
import json
import logging
import pkgutil

import lore
import lore.util
import lore.env
from lore.env import require
from lore.util import timer

require(lore.dependencies.PANDAS + lore.dependencies.FLASK)
import pandas
from flask import Flask, request

app = Flask(lore.env.APP)

logger = logging.getLogger(__name__)


@app.route('/')
def index():
    names = str([
        name
        for _, name, _ in pkgutil.iter_modules([lore.env.APP + '/' + 'models'])
    ])
    return 'Hello %s!' % lore.env.APP + '\n' + names


for module_finder, module_name, _ in pkgutil.iter_modules(
Esempio n. 14
0
def download(remote_url, local_path=None, cache=True, extract=False):
    _bucket = bucket
    if re.match(r'^https?://', remote_url):
        protocol = 'http'
    elif re.match(r'^s3?://', remote_url):
        require(lore.dependencies.S3)
        import boto3
        from botocore.exceptions import ClientError
        protocol = 's3'
        url_parts = urlparse(remote_url)
        remote_url = url_parts.path[1:]
        _bucket = boto3.resource('s3').Bucket(url_parts.netloc)
    else:
        if s3 is None or bucket is None:
            raise NotImplementedError(
                "Cannot download from s3 without config/aws.cfg")
        protocol = 's3'
        remote_url = prefix_remote_root(remote_url)
    if cache:
        if local_path is None:
            if protocol == 'http':
                filename = lore.env.parse_url(remote_url).path.split('/')[-1]
            elif protocol == 's3':
                filename = remote_url
            local_path = os.path.join(lore.env.DATA_DIR, filename)

        if os.path.exists(local_path):
            return local_path
    elif local_path:
        raise ValueError(
            "You can't pass lore.io.download(local_path=X), unless you also pass cache=True"
        )
    elif extract:
        raise ValueError(
            "You can't pass lore.io.download(extract=True), unless you also pass cache=True"
        )

    with timer('DOWNLOAD: %s' % remote_url):
        temp_file, temp_path = tempfile.mkstemp(dir=lore.env.WORK_DIR)
        os.close(temp_file)
        try:
            if protocol == 'http':
                lore.env.retrieve_url(remote_url, temp_path)
            else:
                _bucket.download_file(remote_url, temp_path)
        except ClientError as e:
            logger.error("Error downloading file: %s" % e)
            raise

    if cache:
        dir = os.path.dirname(local_path)
        if not os.path.exists(dir):
            try:
                os.makedirs(dir)
            except os.FileExistsError:
                pass  # race to create

        shutil.copy(temp_path, local_path)
        os.remove(temp_path)

        if extract:
            with timer('EXTRACT: %s' % local_path, logging.WARNING):
                if local_path[-7:] == '.tar.gz':
                    with tarfile.open(local_path, 'r:gz') as tar:
                        tar.extractall(os.path.dirname(local_path))
                elif local_path[-4:] == '.zip':
                    import zipfile
                    with zipfile.ZipFile(local_path, 'r') as zip:
                        zip.extractall(os.path.dirname(local_path))

    else:
        local_path = temp_path
    return local_path