Example #1
0
def get_configs_and_model(folder_path):
    """Go through folder with results and retrieve configs and the pickled model."""
    configs = [os.path.join(folder_path, cfg) for cfg in os.listdir(folder_path) if 'cfg' in cfg]
    data_cfg = parse_data_config([dc for dc in configs if 'rat' in dc or 'human' in dc][0])
    repr_cfg = parse_representation_config([rc for rc in configs if 'maccs' in rc or 'morgan' in rc or 'krfp' in rc][0])
    task_cfg = parse_task_config([tc for tc in configs if 'regression' in tc or 'classification' in tc][0])
    model_cfg = parse_model_config([mc for mc in configs if 'nb.cfg' in mc or 'svm.cfg' in mc or 'trees.cfg' in mc][0])
    model_pickle = [os.path.join(folder_path, pkl) for pkl in os.listdir(folder_path) if 'model.pickle' in pkl][0]

    return data_cfg, repr_cfg, task_cfg, model_cfg, model_pickle
Example #2
0
import numpy as np
import pandas as pd

from metstab_shap.grid import SVC_rbf
from metstab_shap.config import utils_section, csv_section
from metstab_shap.config import parse_data_config, parse_representation_config, parse_task_config
from metstab_shap.data import load_data

# load data (and change to classification if needed)
data_cfg = parse_data_config('configs/data/human.cfg')
repr_cfg = parse_representation_config('configs/repr/krfp.cfg')
task_cfg = parse_task_config('configs/task/classification.cfg')
x, y, _, test_x, test_y, smiles, test_smiles = load_data(
    data_cfg, **repr_cfg[utils_section])
# change y in case of classification
if 'classification' == task_cfg[utils_section]['task']:
    log_scale = True if 'log' == data_cfg[csv_section]['scale'].lower().strip(
    ) else False
    y = task_cfg[utils_section]['cutoffs'](y, log_scale)
    test_y = task_cfg[utils_section]['cutoffs'](test_y, log_scale)

training_features = x
training_target = y
testing_features = test_x

# Average CV score on the training set was: 0.8592512301458379
exported_pipeline = SVC_rbf(C=5.0,
                            cache_size=100,
                            gamma=0.01,
                            kernel="rbf",
                            max_iter=2000,
Example #3
0
import pandas as pd
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import Binarizer, MinMaxScaler
from sklearn.tree import DecisionTreeRegressor
from tpot.builtins import StackingEstimator, ZeroCount
from tpot.export_utils import set_param_recursive

from metstab_shap.config import utils_section, csv_section
from metstab_shap.config import parse_data_config, parse_representation_config, parse_task_config
from metstab_shap.data import load_data

# load data (and change to classification if needed)
data_cfg = parse_data_config('configs/data/rat.cfg')
repr_cfg = parse_representation_config('configs/repr/maccs.cfg')
task_cfg = parse_task_config('configs/task/regression.cfg')
x, y, _, test_x, test_y, smiles, test_smiles = load_data(
    data_cfg, **repr_cfg[utils_section])

training_features = x
training_target = y
testing_features = test_x

# Average CV score on the training set was: -0.15289999993179348
exported_pipeline = make_pipeline(
    ZeroCount(), MinMaxScaler(),
    StackingEstimator(estimator=DecisionTreeRegressor(max_depth=5,
                                                      max_features=0.25,
                                                      min_samples_leaf=3,
                                                      min_samples_split=14,
                                                      splitter="best")),
Example #4
0
    saving_dir = sys.argv[6]
    try:
        os.makedirs(saving_dir)
    except FileExistsError:
        pass

    # setup logger (everything that goes through logger or stderr will be saved in a file and sent to stdout)
    logger_wrapper = LoggerWrapper(saving_dir)
    sys.stderr.write = logger_wrapper.log_errors
    logger_wrapper.logger.info(f'Running {sys.argv[1:-1]}')

    # Load configs
    model_cfg = parse_model_config(sys.argv[1])
    data_cfg = parse_data_config(sys.argv[2])
    repr_cfg = parse_representation_config(sys.argv[3])
    task_cfg = parse_task_config(sys.argv[4])
    tpot_cfg = parse_tpot_config(sys.argv[5])
    save_configs(sys.argv[1:-1], saving_dir)

    # # Nicely handling interruptions from neptune UI
    def neptune_aborter():
        # closes TPOT from UI, the best found pipeline will be saved
        logging.getLogger('').info("neptune_aborter: sending Ctrl + C.")
        os.kill(os.getpid(), signal.SIGINT)

    # nexp = None  # uncomment if you don't want to use Neptune
    # leave the code below uncommented if you want to use Neptune
    nexp = neptune.create_experiment(
        name=saving_dir,
        params={
            'dataset':