Naive Estimator **************** A naive estimator is a useful baseline against which to benchmark more complex models. A naive estimator will return the mean of the outcome for regression models and the plurality class for classification models. Note that currently only binary classification is implemented. For binary classifiers, the majority class will be returned """ from __future__ import absolute_import import inspect import logging import lore from lore.env import require import lore.estimators from lore.util import timed, before_after_callbacks require(lore.dependencies.NUMPY) import numpy class Base(lore.estimators.Base): """Base class for the Naive estimator. Implements functionality common to all Naive models""" def __init__(self): super(Base, self).__init__() @before_after_callbacks @timed(logging.INFO) def fit(self, x, y, **kwargs): """ Fit a naive model :param x: Predictors to use for fitting the data (this will not be used in naive models) :param y: Outcome
from __future__ import unicode_literals, absolute_import from abc import ABCMeta, abstractmethod import csv import datetime import logging import os import re import lore from lore.util import timer from lore.env import require require( lore.dependencies.NUMPY + lore.dependencies.INFLECTION + lore.dependencies.PANDAS ) from numpy import sin, cos, sqrt, arctan2, radians import inflection import numpy import pandas logger = logging.getLogger(__name__) class Base(object): __metaclass__ = ABCMeta
from __future__ import absolute_import import json import logging import os.path from os.path import join import pickle import re import lore.ansi import lore.estimators from lore.env import require from lore.util import timer, timed require(lore.dependencies.TABULATE + lore.dependencies.SKLEARN + lore.dependencies.SHAP) import shap from tabulate import tabulate from sklearn.model_selection import RandomizedSearchCV logger = logging.getLogger(__name__) try: FileExistsError except NameError: FileExistsError = OSError class Base(object): def __init__(self, pipeline=None, estimator=None): self.name = self.__module__ + '.' + self.__class__.__name__
# -*- coding: utf-8 -*- import datetime from abc import ABCMeta, abstractmethod import lore from lore.env import require require(lore.dependencies.PANDAS + lore.dependencies.INFLECTION) import pandas import inflection class Base(object): __metaclass__ = ABCMeta def __init__(self): self._data = pandas.DataFrame() @abstractmethod def key(self): """ :return: Composite or a single key for index """ pass @abstractmethod def get_data(self): pass @abstractmethod
from __future__ import absolute_import import atexit import inspect import logging import warnings import lore.io from lore.callbacks import ReloadBest from lore.encoders import Continuous, Pass from lore.pipelines import Observations from lore.util import timed, before_after_callbacks from lore.env import require require(lore.dependencies.KERAS + lore.dependencies.NUMPY + lore.dependencies.PANDAS + lore.dependencies.SKLEARN) import keras import keras.backend from keras.callbacks import EarlyStopping, TensorBoard, TerminateOnNaN from keras.layers import Input, Embedding, Dense, Reshape, Concatenate, Dropout, SimpleRNN, Flatten, LSTM, GRU, BatchNormalization from keras.optimizers import Adam import numpy import pandas from sklearn.base import BaseEstimator import tensorflow from tensorflow.python.client.timeline import Timeline from tensorflow.python.client import device_lib available_gpus = len([ x.name for x in device_lib.list_local_devices() if x.device_type == 'GPU' ])
def __init__(self, url, name='connection', watermark=True, **kwargs): if not sqlalchemy: raise lore.env.ModuleNotFoundError('No module named sqlalchemy. Please add it to requirements.txt.') parsed = lore.env.parse_url(url) self.adapter = parsed.scheme if self.adapter == 'postgres': require(lore.dependencies.POSTGRES) if self.adapter == 'snowflake': require(lore.dependencies.SNOWFLAKE) if 'numpy' not in parsed.query: logger.error('You should add `?numpy=True` query param to your snowflake connection url to ensure proper compatibility') for int_value in ['pool_size', 'pool_recycle', 'max_overflow']: if int_value in kwargs: kwargs[int_value] = int(kwargs[int_value]) if 'poolclass' in kwargs: kwargs['poolclass'] = getattr(sqlalchemy.pool, kwargs['poolclass']) if '__name__' in kwargs: del kwargs['__name__'] if 'echo' not in kwargs: kwargs['echo'] = False logger.info("Creating engine: %s %s" % (url, kwargs)) self._engine = sqlalchemy.create_engine(url, **kwargs).execution_options(autocommit=True) self._metadata = None self.name = name self.url = url self._transactions = [] self.__thread_local = threading.local() @event.listens_for(self._engine, "before_cursor_execute", retval=True) def comment_sql_calls(conn, cursor, statement, parameters, context, executemany): conn.info.setdefault('query_start_time', []).append(datetime.now()) if watermark: stack = inspect.stack()[1:-1] if sys.version_info.major == 3: stack = [(x.filename, x.lineno, x.function) for x in stack] else: stack = [(x[1], x[2], x[3]) for x in stack] paths = [x[0] for x in stack] origin = next((x for x in paths if x.startswith(lore.env.ROOT)), None) if origin is None: origin = next((x for x in paths if 'sqlalchemy' not in x), None) if origin is None: origin = paths[0] caller = next(x for x in stack if x[0] == origin) statement = "/* %s | %s:%d in %s */\n" % (lore.env.APP, caller[0], caller[1], caller[2]) + statement return statement, parameters @event.listens_for(self._engine, "after_cursor_execute") def time_sql_calls(conn, cursor, statement, parameters, context, executemany): total = datetime.now() - conn.info['query_start_time'].pop(-1) logger.info("SQL: %s" % total) @event.listens_for(self._engine, "connect") def receive_connect(dbapi_connection, connection_record): if hasattr(dbapi_connection, 'get_dsn_parameters'): logger.info("connect: %s" % dbapi_connection.get_dsn_parameters())
options = config._sections[section] if options.get('url') == '$DATABASE_URL': logger.error( '$DATABASE_URL is not set, but is used in config/database.cfg. Skipping connection.' ) else: vars()[section.lower()] = Connection(name=section.lower(), **options) if 'metadata' not in vars(): vars()['metadata'] = Connection('sqlite:///%s/metadata.sqlite' % lore.env.DATA_DIR) redis_config = lore.env.REDIS_CONFIG if redis_config: require(lore.dependencies.REDIS) import redis for section in redis_config.sections(): vars()[section.lower()] = redis.StrictRedis( host=redis_config.get(section, 'url'), port=redis_config.get(section, 'port')) s3 = None bucket = None if lore.env.AWS_CONFIG: require(lore.dependencies.S3) import boto3 from botocore.exceptions import ClientError config = lore.env.AWS_CONFIG
from __future__ import unicode_literals from abc import ABCMeta, abstractmethod import os import re import logging from datetime import timedelta import lore import lore.transformers from lore.env import require from lore.util import timer require(lore.dependencies.INFLECTION + lore.dependencies.NUMPY + lore.dependencies.PANDAS + lore.dependencies.SMART_OPEN) import inflection import numpy import pandas from smart_open import smart_open import lore import lore.transformers from lore.util import timer, get_relevant_args logger = logging.getLogger(__name__) TWIN = '_twin' class Base(object): """ Encoders reduces a data set to a more efficient representation suitable for learning. Encoders may be lossy, and should first be `fit` after
import re import sys import tempfile import threading import uuid from datetime import datetime import lore from lore.env import require from lore.util import timer from lore.stores import query_cached require( lore.dependencies.PANDAS + lore.dependencies.SQL + lore.dependencies.JINJA ) import pandas import sqlalchemy from sqlalchemy import event from sqlalchemy.schema import DropTable from sqlalchemy.ext.compiler import compiles import jinja2 jinja2_env = jinja2.Environment( loader=jinja2.FileSystemLoader( os.path.join(lore.env.ROOT, lore.env.APP, 'extracts') ),
import os from os.path import join, dirname import logging import botocore import lore from lore.util import timer from lore.env import require require(lore.dependencies.H5PY) import h5py try: FileExistsError except NameError: FileExistsError = OSError logger = logging.getLogger(__name__) class Base(lore.models.base.Base): def __init__(self, pipeline, estimator): super(Base, self).__init__(pipeline, estimator) def weights_path(self): return join(self.fitting_path(), 'weights.h5') def checkpoint_path(self): return join(self.fitting_path(), 'checkpoints/{epoch}.h5') def tensorboard_path(self):
scikit-learn Estimator **************** This estimator allows you to use any scikit-learn estimator of your choice. Note that the underlying estimator can always be accessed as ``Base(estimator).sklearn`` """ from __future__ import absolute_import import inspect import logging import warnings import lore import lore.estimators from lore.env import require from lore.util import timed, before_after_callbacks require(lore.dependencies.SKLEARN) class Base(lore.estimators.Base): def __init__(self, estimator): super(Base, self).__init__() self.sklearn = estimator @before_after_callbacks @timed(logging.INFO) def fit(self, x, y, validation_x=None, validation_y=None, **sklearn_kwargs):
import os import gc import pickle import lore from lore.stores.base import Base from lore.util import timer from lore.env import require require(lore.dependencies.PANDAS) import pandas from datetime import datetime try: FileExistsError except NameError: FileExistsError = OSError class Disk(Base): EXTENSION = '.pickle' def __init__(self, dir): self.dir = dir self.limit = None if not os.path.exists(self.dir): try: os.makedirs(self.dir) except FileExistsError as ex: pass # race to create
import inspect import importlib import json import logging import pkgutil import lore import lore.util import lore.env from lore.env import require from lore.util import timer require(lore.dependencies.PANDAS + lore.dependencies.FLASK) import pandas from flask import Flask, request app = Flask(lore.env.APP) logger = logging.getLogger(__name__) @app.route('/') def index(): names = str([ name for _, name, _ in pkgutil.iter_modules([lore.env.APP + '/' + 'models']) ]) return 'Hello %s!' % lore.env.APP + '\n' + names for module_finder, module_name, _ in pkgutil.iter_modules(
def download(remote_url, local_path=None, cache=True, extract=False): _bucket = bucket if re.match(r'^https?://', remote_url): protocol = 'http' elif re.match(r'^s3?://', remote_url): require(lore.dependencies.S3) import boto3 from botocore.exceptions import ClientError protocol = 's3' url_parts = urlparse(remote_url) remote_url = url_parts.path[1:] _bucket = boto3.resource('s3').Bucket(url_parts.netloc) else: if s3 is None or bucket is None: raise NotImplementedError( "Cannot download from s3 without config/aws.cfg") protocol = 's3' remote_url = prefix_remote_root(remote_url) if cache: if local_path is None: if protocol == 'http': filename = lore.env.parse_url(remote_url).path.split('/')[-1] elif protocol == 's3': filename = remote_url local_path = os.path.join(lore.env.DATA_DIR, filename) if os.path.exists(local_path): return local_path elif local_path: raise ValueError( "You can't pass lore.io.download(local_path=X), unless you also pass cache=True" ) elif extract: raise ValueError( "You can't pass lore.io.download(extract=True), unless you also pass cache=True" ) with timer('DOWNLOAD: %s' % remote_url): temp_file, temp_path = tempfile.mkstemp(dir=lore.env.WORK_DIR) os.close(temp_file) try: if protocol == 'http': lore.env.retrieve_url(remote_url, temp_path) else: _bucket.download_file(remote_url, temp_path) except ClientError as e: logger.error("Error downloading file: %s" % e) raise if cache: dir = os.path.dirname(local_path) if not os.path.exists(dir): try: os.makedirs(dir) except os.FileExistsError: pass # race to create shutil.copy(temp_path, local_path) os.remove(temp_path) if extract: with timer('EXTRACT: %s' % local_path, logging.WARNING): if local_path[-7:] == '.tar.gz': with tarfile.open(local_path, 'r:gz') as tar: tar.extractall(os.path.dirname(local_path)) elif local_path[-4:] == '.zip': import zipfile with zipfile.ZipFile(local_path, 'r') as zip: zip.extractall(os.path.dirname(local_path)) else: local_path = temp_path return local_path