# specific language governing permissions and limitations # under the License. """ Titanic data source. This is one of the main _formal_ forml components (along with `pipeline` and `evaluation`) that's being looked up by the forml loader. In this case it is implemented as a python module but it could be as well a package `source/__init__.py` (to potentially split it into additional informal submodules). """ from forml.lib.flow.operator import cast from forml.lib.schema.kaggle import titanic as schema from forml.project import component FEATURES = schema.Passenger.select( schema.Passenger.Pclass, schema.Passenger.Name, schema.Passenger.Sex, schema.Passenger.Age, schema.Passenger.SibSp, schema.Passenger.Parch, schema.Passenger.Ticket, schema.Passenger.Fare, schema.Passenger.Cabin, schema.Passenger.Embarked, ) ETL = component.Source.query(FEATURES, schema.Passenger.Survived) >> cast.ndframe(FEATURES.schema) component.setup(ETL)
# distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Dummy project pipeline. """ from forml.lib.flow.actor import wrapped from forml.lib.flow.operator.generic import simple from forml.project import component @simple.Mapper.operator @wrapped.Function.actor def noop(): """Dummy transformer.""" INSTANCE = noop() component.setup(INSTANCE)
def test_setup(): """Test the direct setup access.""" compmod.setup(object())
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Titanic evaluation definition. This is one of the main _formal_ forml components (along with `source` and `evaluation`) that's being looked up by the forml loader. """ from sklearn import model_selection, metrics from forml.project import component from forml.lib.flow.operator.folding import evaluation # Typical method of providing component implementation using `component.setup()`. Choosing the `MergingScorer` operator # to implement classical crossvalidated metric scoring component.setup( evaluation.MergingScorer( crossvalidator=model_selection.StratifiedKFold(n_splits=2, shuffle=True, random_state=42), metric=metrics.log_loss, ))
the forml loader. In this case it is implemented as a python package but it could be as well just a module `pipeline.py`. All the submodules of this packages have no semantic meaning for ForML - they are completely informal and have been created just for structuring the project code base splitting it into these particular parts with arbitrary names. """ from sklearn import model_selection from forml.lib.flow.operator.folding import ensemble from forml.project import component from titanic.pipeline import preprocessing, model # Stack of models implemented based on the forml lib ensembler supplied with standard sklearn Random Forest and # Gradient Boosting Classifiers using the sklearn StratifiedKFold crossvalidation splitter. STACK = ensemble.FullStacker( bases=(model.RFC(n_estimators=10, random_state=42), model.GBC(random_state=42)), crossvalidator=model_selection.StratifiedKFold(n_splits=2, shuffle=True, random_state=42), ) # This is the main pipeline composition: FLOW = (preprocessing.NaNImputer() >> preprocessing.parse_title( source='Name', target='Title') >> preprocessing.ENCODER( cols=['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked', 'Title']) >> STACK >> model.LR(random_state=42, solver='lbfgs')) # And the final step is registering the pipeline instance as the forml component: component.setup(FLOW)