Esempio n. 1
0
# specific language governing permissions and limitations
# under the License.

"""
Titanic data source.

This is one of the main _formal_ forml components (along with `pipeline` and `evaluation`) that's being looked up by
the forml loader. In this case it is implemented as a python module but it could be as well a package
`source/__init__.py` (to potentially split it into additional informal submodules).
"""

from forml.lib.flow.operator import cast
from forml.lib.schema.kaggle import titanic as schema
from forml.project import component

FEATURES = schema.Passenger.select(
    schema.Passenger.Pclass,
    schema.Passenger.Name,
    schema.Passenger.Sex,
    schema.Passenger.Age,
    schema.Passenger.SibSp,
    schema.Passenger.Parch,
    schema.Passenger.Ticket,
    schema.Passenger.Fare,
    schema.Passenger.Cabin,
    schema.Passenger.Embarked,
)

ETL = component.Source.query(FEATURES, schema.Passenger.Survived) >> cast.ndframe(FEATURES.schema)
component.setup(ETL)
Esempio n. 2
0
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Dummy project pipeline.
"""
from forml.lib.flow.actor import wrapped
from forml.lib.flow.operator.generic import simple
from forml.project import component


@simple.Mapper.operator
@wrapped.Function.actor
def noop():
    """Dummy transformer."""


INSTANCE = noop()
component.setup(INSTANCE)
Esempio n. 3
0
def test_setup():
    """Test the direct setup access."""
    compmod.setup(object())
Esempio n. 4
0
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Titanic evaluation definition.

This is one of the main _formal_ forml components (along with `source` and `evaluation`) that's being looked up by
the forml loader.
"""

from sklearn import model_selection, metrics

from forml.project import component
from forml.lib.flow.operator.folding import evaluation

# Typical method of providing component implementation using `component.setup()`. Choosing the `MergingScorer` operator
# to implement classical crossvalidated metric scoring
component.setup(
    evaluation.MergingScorer(
        crossvalidator=model_selection.StratifiedKFold(n_splits=2,
                                                       shuffle=True,
                                                       random_state=42),
        metric=metrics.log_loss,
    ))
Esempio n. 5
0
the forml loader. In this case it is implemented as a python package but it could be as well just a module
`pipeline.py`.

All the submodules of this packages have no semantic meaning for ForML - they are completely informal and have been
created just for structuring the project code base splitting it into these particular parts with arbitrary names.
"""
from sklearn import model_selection

from forml.lib.flow.operator.folding import ensemble
from forml.project import component
from titanic.pipeline import preprocessing, model

# Stack of models implemented based on the forml lib ensembler supplied with standard sklearn Random Forest and
# Gradient Boosting Classifiers using the sklearn StratifiedKFold crossvalidation splitter.
STACK = ensemble.FullStacker(
    bases=(model.RFC(n_estimators=10,
                     random_state=42), model.GBC(random_state=42)),
    crossvalidator=model_selection.StratifiedKFold(n_splits=2,
                                                   shuffle=True,
                                                   random_state=42),
)

# This is the main pipeline composition:
FLOW = (preprocessing.NaNImputer() >> preprocessing.parse_title(
    source='Name', target='Title') >> preprocessing.ENCODER(
        cols=['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked', 'Title']) >> STACK
        >> model.LR(random_state=42, solver='lbfgs'))

# And the final step is registering the pipeline instance as the forml component:
component.setup(FLOW)