예제 #1
0
def main(inputs_path,
         output_obj,
         base_paths=None,
         meta_path=None,
         outfile_params=None):
    """
    Parameter
    ---------
    inputs_path : str
        File path for Galaxy parameters

    output_obj : str
        File path for ensemble estimator ouput

    base_paths : str
        File path or paths concatenated by comma.

    meta_path : str
        File path

    outfile_params : str
        File path for params output
    """
    with open(inputs_path, 'r') as param_handler:
        params = json.load(param_handler)

    estimator_type = params['algo_selection']['estimator_type']
    # get base estimators
    base_estimators = []
    for idx, base_file in enumerate(base_paths.split(',')):
        if base_file and base_file != 'None':
            with open(base_file, 'rb') as handler:
                model = load_model(handler)
        else:
            estimator_json = (
                params['base_est_builder'][idx]['estimator_selector'])
            model = get_estimator(estimator_json)

        if estimator_type.startswith('sklearn'):
            named = model.__class__.__name__.lower()
            named = 'base_%d_%s' % (idx, named)
            base_estimators.append((named, model))
        else:
            base_estimators.append(model)

    # get meta estimator, if applicable
    if estimator_type.startswith('mlxtend'):
        if meta_path:
            with open(meta_path, 'rb') as f:
                meta_estimator = load_model(f)
        else:
            estimator_json = (params['algo_selection']['meta_estimator']
                              ['estimator_selector'])
            meta_estimator = get_estimator(estimator_json)

    options = params['algo_selection']['options']

    cv_selector = options.pop('cv_selector', None)
    if cv_selector:
        splitter, groups = get_cv(cv_selector)
        options['cv'] = splitter
        # set n_jobs
        options['n_jobs'] = N_JOBS

    weights = options.pop('weights', None)
    if weights:
        weights = ast.literal_eval(weights)
        if weights:
            options['weights'] = weights

    mod_and_name = estimator_type.split('_')
    mod = sys.modules[mod_and_name[0]]
    klass = getattr(mod, mod_and_name[1])

    if estimator_type.startswith('sklearn'):
        options['n_jobs'] = N_JOBS
        ensemble_estimator = klass(base_estimators, **options)

    elif mod == mlxtend.classifier:
        ensemble_estimator = klass(classifiers=base_estimators,
                                   meta_classifier=meta_estimator,
                                   **options)

    else:
        ensemble_estimator = klass(regressors=base_estimators,
                                   meta_regressor=meta_estimator,
                                   **options)

    print(ensemble_estimator)
    for base_est in base_estimators:
        print(base_est)

    with open(output_obj, 'wb') as out_handler:
        pickle.dump(ensemble_estimator, out_handler, pickle.HIGHEST_PROTOCOL)

    if params['get_params'] and outfile_params:
        results = get_search_params(ensemble_estimator)
        df = pd.DataFrame(results, columns=['', 'Parameter', 'Value'])
        df.to_csv(outfile_params, sep='\t', index=False)
예제 #2
0
def build_keras_model(
    inputs,
    outfile,
    model_json,
    infile_weights=None,
    batch_mode=False,
    outfile_params=None,
):
    """
    for `keras_model_builder` tool

    Parameters
    ----------
    inputs : dict
        loaded galaxy tool parameters from `keras_model_builder` tool.
    outfile : str
        Path to galaxy dataset containing the keras_galaxy model output.
    model_json : str
        Path to dataset containing keras model JSON.
    infile_weights : str or None
        If string, path to dataset containing model weights.
    batch_mode : bool, default=False
        Whether to build online batch classifier.
    outfile_params : str, default=None
        File path to search parameters output.
    """
    with open(model_json, "r") as f:
        json_model = json.load(f)

    config = json_model["config"]

    options = {}

    if json_model["class_name"] == "Sequential":
        options["model_type"] = "sequential"
        klass = Sequential
    elif json_model["class_name"] == "Model":
        options["model_type"] = "functional"
        klass = Model
    else:
        raise ValueError("Unknow Keras model class: %s" %
                         json_model["class_name"])

    # load prefitted model
    if inputs["mode_selection"]["mode_type"] == "prefitted":
        estimator = klass.from_config(config)
        estimator.load_weights(infile_weights)
    # build train model
    else:
        cls_name = inputs["mode_selection"]["learning_type"]
        klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name)

        options["loss"] = inputs["mode_selection"]["compile_params"]["loss"]
        options["optimizer"] = (
            inputs["mode_selection"]["compile_params"]["optimizer_selection"]
            ["optimizer_type"]).lower()

        options.update((inputs["mode_selection"]["compile_params"]
                        ["optimizer_selection"]["optimizer_options"]))

        train_metrics = inputs["mode_selection"]["compile_params"]["metrics"]
        if train_metrics[-1] == "none":
            train_metrics = train_metrics[:-1]
        options["metrics"] = train_metrics

        options.update(inputs["mode_selection"]["fit_params"])
        options["seed"] = inputs["mode_selection"]["random_seed"]

        if batch_mode:
            generator = get_batch_generator(
                inputs["mode_selection"]["generator_selection"])
            options["data_batch_generator"] = generator
            options["prediction_steps"] = inputs["mode_selection"][
                "prediction_steps"]
            options["class_positive_factor"] = inputs["mode_selection"][
                "class_positive_factor"]
        estimator = klass(config, **options)
        if outfile_params:
            hyper_params = get_search_params(estimator)
            # TODO: remove this after making `verbose` tunable
            for h_param in hyper_params:
                if h_param[1].endswith("verbose"):
                    h_param[0] = "@"
            df = pd.DataFrame(hyper_params, columns=["", "Parameter", "Value"])
            df.to_csv(outfile_params, sep="\t", index=False)

    print(repr(estimator))
    # save model by pickle
    with open(outfile, "wb") as f:
        pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL)
예제 #3
0
def build_keras_model(inputs,
                      outfile,
                      model_json,
                      infile_weights=None,
                      batch_mode=False,
                      outfile_params=None):
    """ for `keras_model_builder` tool

    Parameters
    ----------
    inputs : dict
        loaded galaxy tool parameters from `keras_model_builder` tool.
    outfile : str
        Path to galaxy dataset containing the keras_galaxy model output.
    model_json : str
        Path to dataset containing keras model JSON.
    infile_weights : str or None
        If string, path to dataset containing model weights.
    batch_mode : bool, default=False
        Whether to build online batch classifier.
    outfile_params : str, default=None
        File path to search parameters output.
    """
    with open(model_json, 'r') as f:
        json_model = json.load(f)

    config = json_model['config']

    options = {}

    if json_model['class_name'] == 'Sequential':
        options['model_type'] = 'sequential'
        klass = Sequential
    elif json_model['class_name'] == 'Model':
        options['model_type'] = 'functional'
        klass = Model
    else:
        raise ValueError("Unknow Keras model class: %s" %
                         json_model['class_name'])

    # load prefitted model
    if inputs['mode_selection']['mode_type'] == 'prefitted':
        estimator = klass.from_config(config)
        estimator.load_weights(infile_weights)
    # build train model
    else:
        cls_name = inputs['mode_selection']['learning_type']
        klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name)

        options['loss'] = (inputs['mode_selection']['compile_params']['loss'])
        options['optimizer'] =\
            (inputs['mode_selection']['compile_params']
             ['optimizer_selection']['optimizer_type']).lower()

        options.update((inputs['mode_selection']['compile_params']
                        ['optimizer_selection']['optimizer_options']))

        train_metrics = (
            inputs['mode_selection']['compile_params']['metrics']).split(',')
        if train_metrics[-1] == 'none':
            train_metrics = train_metrics[:-1]
        options['metrics'] = train_metrics

        options.update(inputs['mode_selection']['fit_params'])
        options['seed'] = inputs['mode_selection']['random_seed']

        if batch_mode:
            generator = get_batch_generator(
                inputs['mode_selection']['generator_selection'])
            options['data_batch_generator'] = generator
            options['prediction_steps'] = \
                inputs['mode_selection']['prediction_steps']
            options['class_positive_factor'] = \
                inputs['mode_selection']['class_positive_factor']
        estimator = klass(config, **options)
        if outfile_params:
            hyper_params = get_search_params(estimator)
            # TODO: remove this after making `verbose` tunable
            for h_param in hyper_params:
                if h_param[1].endswith('verbose'):
                    h_param[0] = '@'
            df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value'])
            df.to_csv(outfile_params, sep='\t', index=False)

    print(repr(estimator))
    # save model by pickle
    with open(outfile, 'wb') as f:
        pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL)
예제 #4
0
def main(inputs_path,
         output_obj,
         base_paths=None,
         meta_path=None,
         outfile_params=None):
    """
    Parameter
    ---------
    inputs_path : str
        File path for Galaxy parameters

    output_obj : str
        File path for ensemble estimator ouput

    base_paths : str
        File path or paths concatenated by comma.

    meta_path : str
        File path

    outfile_params : str
        File path for params output
    """
    with open(inputs_path, "r") as param_handler:
        params = json.load(param_handler)

    estimator_type = params["algo_selection"]["estimator_type"]
    # get base estimators
    base_estimators = []
    for idx, base_file in enumerate(base_paths.split(",")):
        if base_file and base_file != "None":
            with open(base_file, "rb") as handler:
                model = load_model(handler)
        else:
            estimator_json = params["base_est_builder"][idx][
                "estimator_selector"]
            model = get_estimator(estimator_json)

        if estimator_type.startswith("sklearn"):
            named = model.__class__.__name__.lower()
            named = "base_%d_%s" % (idx, named)
            base_estimators.append((named, model))
        else:
            base_estimators.append(model)

    # get meta estimator, if applicable
    if estimator_type.startswith("mlxtend"):
        if meta_path:
            with open(meta_path, "rb") as f:
                meta_estimator = load_model(f)
        else:
            estimator_json = params["algo_selection"]["meta_estimator"][
                "estimator_selector"]
            meta_estimator = get_estimator(estimator_json)

    options = params["algo_selection"]["options"]

    cv_selector = options.pop("cv_selector", None)
    if cv_selector:
        splitter, _groups = get_cv(cv_selector)
        options["cv"] = splitter
        # set n_jobs
        options["n_jobs"] = N_JOBS

    weights = options.pop("weights", None)
    if weights:
        weights = ast.literal_eval(weights)
        if weights:
            options["weights"] = weights

    mod_and_name = estimator_type.split("_")
    mod = sys.modules[mod_and_name[0]]
    klass = getattr(mod, mod_and_name[1])

    if estimator_type.startswith("sklearn"):
        options["n_jobs"] = N_JOBS
        ensemble_estimator = klass(base_estimators, **options)

    elif mod == mlxtend.classifier:
        ensemble_estimator = klass(classifiers=base_estimators,
                                   meta_classifier=meta_estimator,
                                   **options)

    else:
        ensemble_estimator = klass(regressors=base_estimators,
                                   meta_regressor=meta_estimator,
                                   **options)

    print(ensemble_estimator)
    for base_est in base_estimators:
        print(base_est)

    with open(output_obj, "wb") as out_handler:
        pickle.dump(ensemble_estimator, out_handler, pickle.HIGHEST_PROTOCOL)

    if params["get_params"] and outfile_params:
        results = get_search_params(ensemble_estimator)
        df = pd.DataFrame(results, columns=["", "Parameter", "Value"])
        df.to_csv(outfile_params, sep="\t", index=False)