def compose_chain(self, data: InputData, initial_chain: Optional[Chain], composer_requirements: ComposerRequirements, metrics: Optional[Callable], optimiser_parameters=None, is_visualise: bool = False) -> Chain: new_chain = Chain() if self.dummy_chain_type == DummyChainTypeEnum.hierarchical: # (y1, y2) -> y last_node = NodeGenerator.secondary_node( composer_requirements.secondary[0]) for requirement_model in composer_requirements.primary: new_node = NodeGenerator.primary_node(requirement_model) new_chain.add_node(new_node) last_node.nodes_from.append(new_node) new_chain.add_node(last_node) elif self.dummy_chain_type == DummyChainTypeEnum.flat: # (y1) -> (y2) -> y first_node = NodeGenerator.primary_node( composer_requirements.primary[0]) new_chain.add_node(first_node) prev_node = first_node for requirement_model in composer_requirements.secondary: new_node = NodeGenerator.secondary_node(requirement_model) new_node.nodes_from = [prev_node] prev_node = new_node new_chain.add_node(new_node) else: raise NotImplementedError() return new_chain
def test_secondary_nodes_is_invariant_to_inputs_order(data_setup): data = data_setup train, test = train_test_data_setup(data) first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.lda) third = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.knn) final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.xgboost, nodes_from=[first, second, third]) chain = Chain() for node in [first, second, third, final]: chain.add_node(node) first = deepcopy(first) second = deepcopy(second) third = deepcopy(third) final_shuffled = NodeGenerator.secondary_node( model_type=ModelTypesIdsEnum.xgboost, nodes_from=[third, first, second]) chain_shuffled = Chain() # change order of nodes in list for node in [final_shuffled, third, first, second]: chain_shuffled.add_node(node) train_predicted = chain.fit(input_data=train) train_predicted_shuffled = chain_shuffled.fit(input_data=train) # train results should be invariant assert chain.root_node.descriptive_id == chain_shuffled.root_node.descriptive_id assert all( np.equal(train_predicted.predict, train_predicted_shuffled.predict)) test_predicted = chain.predict(input_data=test) test_predicted_shuffled = chain_shuffled.predict(input_data=test) # predict results should be invariant assert all( np.equal(test_predicted.predict, test_predicted_shuffled.predict)) # change parents order for the nodes fitted chain nodes_for_change = chain.nodes[3].nodes_from chain.nodes[3].nodes_from = [ nodes_for_change[2], nodes_for_change[0], nodes_for_change[1] ] chain.nodes[3].cache.clear() chain.fit(train) test_predicted_re_shuffled = chain.predict(input_data=test) # predict results should be invariant assert all( np.equal(test_predicted.predict, test_predicted_re_shuffled.predict))
def test_chain_hierarchy_fit_correct(data_setup): data = data_setup train, _ = train_test_data_setup(data) first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[second, third]) chain = Chain() for node in [first, second, third, final]: chain.add_node(node) train_predicted = chain.fit(input_data=train, use_cache=False) assert chain.root_node.descriptive_id == ( '((/n_ModelTypesIdsEnum.logit_defaultparams;)/' 'n_ModelTypesIdsEnum.logit_defaultparams;;(/' 'n_ModelTypesIdsEnum.logit_defaultparams;)/' 'n_ModelTypesIdsEnum.logit_defaultparams;)/' 'n_ModelTypesIdsEnum.logit_defaultparams') assert chain.length == 4 assert chain.depth == 3 assert train_predicted.predict.shape == train.target.shape
def baseline_chain(): chain = Chain() last_node = NodeGenerator.secondary_node( model_type=ModelTypesIdsEnum.xgboost, nodes_from=[]) for requirement_model in [ModelTypesIdsEnum.knn, ModelTypesIdsEnum.logit]: new_node = NodeGenerator.primary_node(requirement_model) chain.add_node(new_node) last_node.nodes_from.append(new_node) chain.add_node(last_node) return chain
def chain_with_self_cycle(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) second.nodes_from.append(second) chain = Chain() chain.add_node(first) chain.add_node(second) return chain
def chain_fourth(): # XG # | \ # XG KNN # | \ | \ # QDA KNN LR LDA # | \ | \ # RF RF KNN KNN chain = chain_first() new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.qda) for model_type in (ModelTypesIdsEnum.rf, ModelTypesIdsEnum.rf): new_node.nodes_from.append(NodeGenerator.primary_node(model_type)) chain.replace_node_with_parents( chain.root_node.nodes_from[0].nodes_from[1], new_node) new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.knn) for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.knn): new_node.nodes_from.append(NodeGenerator.primary_node(model_type)) chain.replace_node_with_parents( chain.root_node.nodes_from[0].nodes_from[0], new_node) return chain
def chain_third(): # QDA # | \ # RF RF chain = Chain() new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.qda) for model_type in (ModelTypesIdsEnum.rf, ModelTypesIdsEnum.rf): new_node.nodes_from.append(NodeGenerator.primary_node(model_type)) chain.add_node(new_node) [chain.add_node(node_from) for node_from in new_node.nodes_from] return chain
def chain_second(): # XG # | \ # DT KNN # | \ | \ # KNN KNN LR LDA chain = chain_first() new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.dt) for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.knn): new_node.nodes_from.append(NodeGenerator.primary_node(model_type)) chain.replace_node_with_parents(chain.root_node.nodes_from[0], new_node) return chain
def chain_with_cycle(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[second, first]) second.nodes_from.append(third) chain = Chain() for node in [first, second, third]: chain.add_node(node) return chain
def chain_with_multiple_roots(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) root_first = NodeGenerator.secondary_node( model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) root_second = NodeGenerator.secondary_node( model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) chain = Chain() for node in [first, root_first, root_second]: chain.add_node(node) return chain
def default_valid_chain(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[second, third]) chain = Chain() for node in [first, second, third, final]: chain.add_node(node) return chain
def chain_fifth(): # KNN # | \ # XG KNN # | \ | \ # LR LDA KNN KNN chain = chain_first() new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.knn) chain.update_node(chain.root_node, new_node) new_node = NodeGenerator.primary_node(ModelTypesIdsEnum.knn) chain.update_node(chain.root_node.nodes_from[1].nodes_from[0], new_node) chain.update_node(chain.root_node.nodes_from[1].nodes_from[1], new_node) return chain
def chain_with_isolated_nodes(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[second]) isolated = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[]) chain = Chain() for node in [first, second, third, isolated]: chain.add_node(node) return chain
def chain_third(): # XG # | | \ # KNN LDA KNN root_of_tree = NodeGenerator.secondary_node(ModelTypesIdsEnum.xgboost) for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.lda, ModelTypesIdsEnum.knn): root_of_tree.nodes_from.append(NodeGenerator.primary_node(model_type)) chain = Chain() for node in root_of_tree.nodes_from: chain.add_node(node) chain.add_node(root_of_tree) return chain
def chain_second(): # XG # | \ # XG KNN # | \ | \ # LR XG LR LDA # | \ # KNN LDA new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.xgboost) for model_type in (ModelTypesIdsEnum.knn, ModelTypesIdsEnum.lda): new_node.nodes_from.append(NodeGenerator.primary_node(model_type)) chain = chain_first() chain.replace_node_with_parents( chain.root_node.nodes_from[0].nodes_from[1], new_node) return chain
def chain_fourth(): # XG # | \ \ # KNN XG KNN # | \ # KNN KNN chain = chain_third() new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.xgboost) [ new_node.nodes_from.append( NodeGenerator.primary_node(ModelTypesIdsEnum.knn)) for _ in range(2) ] chain.replace_node_with_parents(chain.root_node.nodes_from[1], new_node) return chain
def test_eval_strategy_logreg(data_setup): data_set = data_setup train, test = train_test_data_setup(data=data_set) test_skl_model = LogisticRegression(C=10., random_state=1, solver='liblinear', max_iter=10000, verbose=0) test_skl_model.fit(train.features, train.target) expected_result = test_skl_model.predict(test.features) test_model_node = NodeGenerator.primary_node( model_type=ModelTypesIdsEnum.logit) test_model_node.fit(input_data=train) actual_result = test_model_node.predict(input_data=test) assert len(actual_result.predict) == len(expected_result)
def test_nodes_sequence_fit_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) train, _ = train_test_data_setup(data) first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.lda, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.qda, nodes_from=[first]) final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.knn, nodes_from=[second, third]) train_predicted = final.fit(input_data=train) assert final.descriptive_id == ( '((/n_ModelTypesIdsEnum.logit_defaultparams;)/' 'n_ModelTypesIdsEnum.lda_defaultparams;;(/' 'n_ModelTypesIdsEnum.logit_defaultparams;)/' 'n_ModelTypesIdsEnum.qda_defaultparams;)/' 'n_ModelTypesIdsEnum.knn_defaultparams') assert train_predicted.predict.shape == train.target.shape assert final.cache.actual_cached_state is not None
def chain_example(): # XG # | \ # XG KNN # | \ | \ # LR LDA LR LDA chain = Chain() root_of_tree, root_child_first, root_child_second = \ [NodeGenerator.secondary_node(model) for model in (ModelTypesIdsEnum.xgboost, ModelTypesIdsEnum.xgboost, ModelTypesIdsEnum.knn)] for root_node_child in (root_child_first, root_child_second): for requirement_model in (ModelTypesIdsEnum.logit, ModelTypesIdsEnum.lda): new_node = NodeGenerator.primary_node(requirement_model) root_node_child.nodes_from.append(new_node) chain.add_node(new_node) chain.add_node(root_node_child) root_of_tree.nodes_from.append(root_node_child) chain.add_node(root_of_tree) return chain
from core.composer.node import NodeGenerator from core.models.model import * from benchmark.benchmark_utils import get_scoring_case_data_paths train_file_path, test_file_path = get_scoring_case_data_paths() train_data = InputData.from_csv(train_file_path) test_data = InputData.from_csv(test_file_path) training_features = train_data.features testing_features = test_data.features training_target = train_data.target testing_target = test_data.target chain = Chain() node0 = NodeGenerator.primary_node(ModelTypesIdsEnum.tpot) node1 = NodeGenerator.primary_node(ModelTypesIdsEnum.lda) node2 = NodeGenerator.secondary_node(ModelTypesIdsEnum.rf) node2.nodes_from.append(node0) node2.nodes_from.append(node1) chain.add_node(node0) chain.add_node(node1) chain.add_node(node2) chain.fit(train_data) results = chain.predict(test_data) roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict) print(roc_auc_value)