Example #1
0
class Modules(Enum):
    if is_package_available('sklearn'):
        sklearn = "sklearn.datasets"
    if is_package_available('keras'):
        keras = "keras.datasets"
    if is_package_available('torchvision'):
        torch = "torchvision.datasets"
Example #2
0
def check_determinism():
    from pypads.app.pypads import get_current_pads
    pads = get_current_pads()
    if is_package_available('tensorflow'):
        import tensorflow
        tf_version = tensorflow.version.VERSION
        if tensorflow.match("(1\.(14|15)|2\.0)", tf_version):
            if "TF_USE_CUDNN_AUTOTUNE" in os.environ:
                logger.warning(
                    "When using TF auto-tuning of cuDNN convolution algorithms your experiment might"
                    " be non-deterministic.")
                pads.api.set_tag("non-determinism", "CUDNN_AUTOTUNE")

            if ("TF_CUDNN_DETERMINISTIC" not in os.environ
                    or (not os.environ["TF_CUDNN_DETERMINISTIC"]
                        and os.environ["TF_CUDNN_DETERMINISTIC"] is not 1)):
                if not is_package_available("tfdeterminism"):
                    logger.warning(
                        "Your experiment might include a gpu-specific sources of non-determinism."
                        " See https://github.com/NVIDIA/tensorflow-determinism"
                    )
                    pads.api.set_tag(
                        "non-determinism",
                        "TF auto-tuning of cuDNN convolution algorithms (see multi-algo note)"
                    )
Example #3
0
class Types(Enum):
    if is_package_available('sklearn') and tracking_active:
        from sklearn.utils import Bunch
        bunch = Bunch
    else:
        bunch = "sklearn.utils.Bunch"
    if is_package_available('numpy'):
        from numpy import ndarray
        Ndarray = ndarray
    else:
        ndarray = 'numpy.ndarray'
    if is_package_available('pandas'):
        from pandas import DataFrame, Series
        dataframe = DataFrame
        series = Series
    else:
        dataframe = 'pandas.DataFrame'
        series = 'pandas.Series'
    if is_package_available('networkx'):
        from networkx import Graph
        graph = Graph
    else:
        graph = 'networkx.Graph'
    dict = dict
    tuple = Tuple
Example #4
0
def ner_tagging(corpus):
    if is_package_available("spacy"):
        import spacy
        nlp = spacy.load("en_core_web_sm")
        doc = nlp(corpus)
        nouns = set()
        for chunk in doc.noun_chunks:
            if "=" not in chunk.text and "." not in chunk.text:
                nouns.add(chunk.text)

        ents = set()
        for ent in doc.ents:
            if "=" not in ent.text and "." not in ent.text and "`" not in ent.text and "/" not in ent.text:
                ents.add(ent.text)

        return str(nouns), str(ents)

    elif is_package_available("nltk"):
        # TODO use nltk to find named entities https://towardsdatascience.com/named-entity-recognition-with-nltk-and-spacy-8c4a7d88e7da
        pass
Example #5
0
    def _call(self,
              *args,
              _pypads_env: LoggerEnv,
              _pypads_autologgers=None,
              _logger_call,
              _logger_output,
              **kwargs):
        if _pypads_autologgers is None:
            _pypads_autologgers = [
                "keras", "tensorflow", "xgboost", "gluon", "spark", "lightgbm",
                "sklearn"
            ]

        if 'tensorflow' in _pypads_autologgers and 'tensorflow' in sys.modules and 'tensorflow' not in added_auto_logs \
                and is_package_available('tensorflow'):
            added_auto_logs.add('tensorflow')
            from mlflow import tensorflow
            tensorflow.autolog()

        if 'keras' in _pypads_autologgers and 'keras' in sys.modules and 'keras' not in added_auto_logs \
                and is_package_available('keras'):
            added_auto_logs.add('keras')
            from mlflow import keras
            keras.autolog()

        if 'xgboost' in _pypads_autologgers and 'xgboost' in sys.modules and 'xgboost' not in added_auto_logs \
                and is_package_available('xgboost'):
            added_auto_logs.add('xgboost')
            from mlflow import xgboost
            xgboost.autolog()

        if 'gluon' in _pypads_autologgers and 'gluon' in sys.modules and 'gluon' not in added_auto_logs \
                and is_package_available('gluon'):
            added_auto_logs.add('gluon')
            from mlflow import gluon
            gluon.autolog()

        if 'spark' in _pypads_autologgers and 'spark' in sys.modules and 'spark' not in added_auto_logs \
                and is_package_available('pyspark'):
            added_auto_logs.add('spark')
            from mlflow import spark
            spark.autolog()

        if 'lightgbm' in _pypads_autologgers and 'lightgbm' in sys.modules and 'lightgbm' not in added_auto_logs \
                and is_package_available('lightgbm'):
            added_auto_logs.add('lightgbm')
            from mlflow import lightgbm
            lightgbm.autolog()

        if 'sklearn' in _pypads_autologgers and 'sklearn' in sys.modules and 'sklearn' not in added_auto_logs \
                and is_package_available('sklearn'):
            added_auto_logs.add('sklearn')
            from mlflow import sklearn
            sklearn.autolog()
Example #6
0
    def finalize_output(pads, logger_call, output, *args, **kwargs):
        pipeline: PipelineTO = pads.cache.run_get("pipeline")

        from networkx import MultiDiGraph
        network: MultiDiGraph = pipeline.nx_network

        base_folder = get_temp_folder()
        path = os.path.join(base_folder, "pipeline_graph.png")
        if not os.path.exists(base_folder):
            pathlib.Path(base_folder).mkdir(parents=True, exist_ok=True)

        if is_package_available("agraph") and is_package_available(
                "graphviz") and is_package_available("pygraphviz"):
            from networkx.drawing.nx_agraph import to_agraph
            agraph = to_agraph(network)
            agraph.layout('dot')
            agraph.draw(path)
            pipeline.store_artifact(
                path,
                "pipeline_graph.png",
                description=
                "A depiction of the underlying pipeline of the experiment.")

        output.pipeline = pipeline.store()
Example #7
0
 def is_installed(self):
     """
     Check if a match is installed
     :return:
     """
     if self.regex:
         return any({
             self.allows(version)
             for version in find_package_regex_versions(self.name).values()
             if version is not None
         })
     else:
         if is_package_available(self.name):
             version = find_package_version(self.name)
             if version is None:
                 raise VersionNotFoundException(
                     "Couldn't find version for lib {}".format(self.name))
             return self.allows(version)
         return False
Example #8
0
def set_random_seed(seed):
    import random
    global padre_seed
    padre_seed = seed

    # --- set random seed ---
    random.seed(seed)

    # --- set numpy seed ---
    numpy.random.seed(seed)
    # global seeds for numpy seem to not work with RandomState()

    # --- set pytorch seed ---
    if is_package_available("torch"):
        # noinspection PyPackageRequirements,PyUnresolvedReferences
        import torch
        torch.manual_seed(seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed)
Example #9
0
import os
import time
from functools import wraps
from typing import List

from pypads.utils.util import is_package_available

if is_package_available("joblib"):
    import joblib

    original_delayed = joblib.delayed

    @wraps(original_delayed)
    def punched_delayed(fn):
        """Decorator used to capture the arguments of a function."""
        @wraps(fn)
        def wrapped_function(*args,
                             _pypads_cache=None,
                             _pypads_config=None,
                             _pypads_active_run_id=None,
                             _pypads_tracking_uri=None,
                             _pypads_affected_modules=None,
                             _pypads_triggering_process=None,
                             **kwargs):
            from pypads.parallel.util import _pickle_tuple, _cloudpickle_tuple
            from pypads import logger

            # only if pads data was passed
            if _pypads_active_run_id:
                # noinspection PyUnresolvedReferences
                from pypads.app import pypads
Example #10
0

# --- TorchVision Dataset object ---
def torch_crawler(obj: Crawler, **kwargs):
    logger.info("Detecting a torchvision dataset loaded object. Crawling any available metadata...")
    data = obj.data.data.numpy()
    targets = obj.data.targets.numpy()
    train = obj.data.train
    source = obj.data.training_file if train else obj.data.test_file
    metadata = {"format": obj.format, "shape": data.shape, "classes": obj.data.classes,
                "Description": obj.data.__repr__(), "training_data": train, "source": source}
    # metadata = {**metadata, **kwargs}
    return data, metadata, targets


if is_package_available("torchvision"):
    Crawler.register_fn(Modules.torch.value, torch_crawler)


# --- Keras datasets ---
def keras_crawler(obj: Crawler, **kwargs):
    logger.info("Detecting a keras dataset loaded object. Crawling any available metadata...")
    (X_train, y_train), (X_test, y_test) = obj.data
    import numpy as np
    targets = np.concatenate([y_train, y_test])
    data = np.concatenate([np.concatenate([X_train, X_test]), targets.reshape(len(targets), 1)], axis=1)
    metadata = {"format": obj.format, "shape": data.shape}
    metadata = {**metadata, **kwargs}
    return data, metadata, targets

Example #11
0
def numpy_seed(seed):
    try:
        from pypads.app.pypads import get_current_pads
        pads = get_current_pads()
        pads.cache.run_add("numpy.random.seed", seed)
        log_random_seed("numpy.random.seed")
        return original_numpy(seed)
    except Exception as e:
        Warning("Tracker failed to log the set seed because %s" % str(e))
        return original_numpy(seed)


numpy.random.seed = numpy_seed

# --- pytorch seed ---
if is_package_available("torch"):
    # noinspection PyPackageRequirements,PyUnresolvedReferences
    import torch

    original_torch = torch.manual_seed

    def torch_seed(seed):
        try:
            from pypads.app.pypads import get_current_pads
            pads = get_current_pads()
            pads.cache.run_add("torch.seed", seed)
            log_random_seed("torch.seed")
            return original_torch(seed)
        except Exception as e:
            Warning("Tracker failed to log the set seed because %s" % str(e))
            return original_torch(seed)