def test_chain_hierarchy_fit_correct(data_setup): data = data_setup train, _ = train_test_data_setup(data) first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[second, third]) chain = Chain() for node in [first, second, third, final]: chain.add_node(node) train_predicted = chain.fit(input_data=train, use_cache=False) assert chain.root_node.descriptive_id == ( '((/n_ModelTypesIdsEnum.logit_defaultparams;)/' 'n_ModelTypesIdsEnum.logit_defaultparams;;(/' 'n_ModelTypesIdsEnum.logit_defaultparams;)/' 'n_ModelTypesIdsEnum.logit_defaultparams;)/' 'n_ModelTypesIdsEnum.logit_defaultparams') assert chain.length == 4 assert chain.depth == 3 assert train_predicted.predict.shape == train.target.shape
def chain_tuning(nodes_to_tune: str, chain: Chain, train_data: InputData, test_data: InputData, local_iter: int, tuner_iter_num: int = 50) -> (float, list): several_iter_scores_test = [] if nodes_to_tune == 'primary': print('primary_node_tuning') chain_tune_strategy = chain.fine_tune_primary_nodes elif nodes_to_tune == 'root': print('root_node_tuning') chain_tune_strategy = chain.fine_tune_all_nodes else: raise ValueError( f'Invalid type of nodes. Nodes must be primary or root') for iteration in range(local_iter): print(f'current local iteration {iteration}') # Chain tuning chain_tune_strategy(train_data, iterations=tuner_iter_num) # After tuning prediction chain.fit(train_data) after_tuning_predicted = chain.predict(test_data) # Metrics aft_tun_roc_auc = roc_auc(y_true=test_data.target, y_score=after_tuning_predicted.predict) several_iter_scores_test.append(aft_tun_roc_auc) return float(np.mean(several_iter_scores_test)), several_iter_scores_test
def get_rmse_value(chain: Chain, train_data: InputData, test_data: InputData) -> (float, float): train_pred = chain.predict(input_data=train_data) test_pred = chain.predict(input_data=test_data) rmse_value_test = mse(y_true=test_data.target, y_pred=test_pred.predict, squared=False) rmse_value_train = mse(y_true=train_data.target, y_pred=train_pred.predict, squared=False) return rmse_value_train, rmse_value_test
def compose_chain(self, data: InputData, initial_chain: Optional[Chain], composer_requirements: ComposerRequirements, metrics: Optional[Callable], optimiser_parameters=None, is_visualise: bool = False) -> Chain: new_chain = Chain() if self.dummy_chain_type == DummyChainTypeEnum.hierarchical: # (y1, y2) -> y last_node = NodeGenerator.secondary_node( composer_requirements.secondary[0]) for requirement_model in composer_requirements.primary: new_node = NodeGenerator.primary_node(requirement_model) new_chain.add_node(new_node) last_node.nodes_from.append(new_node) new_chain.add_node(last_node) elif self.dummy_chain_type == DummyChainTypeEnum.flat: # (y1) -> (y2) -> y first_node = NodeGenerator.primary_node( composer_requirements.primary[0]) new_chain.add_node(first_node) prev_node = first_node for requirement_model in composer_requirements.secondary: new_node = NodeGenerator.secondary_node(requirement_model) new_node.nodes_from = [prev_node] prev_node = new_node new_chain.add_node(new_node) else: raise NotImplementedError() return new_chain
def metric_for_nodes(self, metric_function, train_data: InputData, test_data: InputData, is_chain_shared: bool, chain: Chain) -> float: validate(chain) if is_chain_shared: chain = SharedChain(base_chain=chain, shared_cache=self.shared_cache) chain.fit(input_data=train_data) return metric_function(chain, test_data)
def chain_with_cycle(): first = PrimaryNode(model_type='logit') second = SecondaryNode(model_type='logit', nodes_from=[first]) third = SecondaryNode(model_type='logit', nodes_from=[second, first]) second.nodes_from.append(third) chain = Chain() for node in [first, second, third]: chain.add_node(node) return chain
def get_roc_auc_value(chain: Chain, train_data: InputData, test_data: InputData) -> (float, float): train_pred = chain.predict(input_data=train_data) test_pred = chain.predict(input_data=test_data) roc_auc_value_test = roc_auc(y_true=test_data.target, y_score=test_pred.predict) roc_auc_value_train = roc_auc(y_true=train_data.target, y_score=train_pred.predict) return roc_auc_value_train, roc_auc_value_test
def two_level_chain(): first = PrimaryNode(model_type='logit') second = PrimaryNode(model_type='knn') third = SecondaryNode(model_type='xgboost', nodes_from=[first, second]) chain = Chain() for node in [first, second, third]: chain.add_node(node) return chain
def get_regr_chain(): # Chain composition first = PrimaryNode(model_type='xgbreg') second = PrimaryNode(model_type='knnreg') final = SecondaryNode(model_type='linear', nodes_from=[first, second]) chain = Chain() chain.add_node(final) return chain
def chain_with_multiple_roots(): first = PrimaryNode(model_type='logit') root_first = SecondaryNode(model_type='logit', nodes_from=[first]) root_second = SecondaryNode(model_type='logit', nodes_from=[first]) chain = Chain() for node in [first, root_first, root_second]: chain.add_node(node) return chain
def get_class_chain(): # Chain composition first = PrimaryNode(model_type='xgboost') second = PrimaryNode(model_type='knn') final = SecondaryNode(model_type='logit', nodes_from=[first, second]) chain = Chain() chain.add_node(final) return chain
def chain_third(): # QDA # | \ # RF RF chain = Chain() new_node = SecondaryNode('qda') for model_type in ('rf', 'rf'): new_node.nodes_from.append(PrimaryNode(model_type)) chain.add_node(new_node) [chain.add_node(node_from) for node_from in new_node.nodes_from] return chain
def chain_third(): # QDA # | \ # RF RF chain = Chain() new_node = NodeGenerator.secondary_node(ModelTypesIdsEnum.qda) for model_type in (ModelTypesIdsEnum.rf, ModelTypesIdsEnum.rf): new_node.nodes_from.append(NodeGenerator.primary_node(model_type)) chain.add_node(new_node) [chain.add_node(node_from) for node_from in new_node.nodes_from] return chain
def valid_chain(): first = PrimaryNode(model_type='logit') second = SecondaryNode(model_type='logit', nodes_from=[first]) third = SecondaryNode(model_type='logit', nodes_from=[second]) last = SecondaryNode(model_type='logit', nodes_from=[third]) chain = Chain() for node in [first, second, third, last]: chain.add_node(node) return chain
def chain_with_isolated_components(): first = PrimaryNode(model_type='logit') second = SecondaryNode(model_type='logit', nodes_from=[first]) third = SecondaryNode(model_type='logit', nodes_from=[]) fourth = SecondaryNode(model_type='logit', nodes_from=[third]) chain = Chain() for node in [first, second, third, fourth]: chain.add_node(node) return chain
def chain_with_isolated_nodes(): first = PrimaryNode(model_type='logit') second = SecondaryNode(model_type='logit', nodes_from=[first]) third = SecondaryNode(model_type='logit', nodes_from=[second]) isolated = SecondaryNode(model_type='logit', nodes_from=[]) chain = Chain() for node in [first, second, third, isolated]: chain.add_node(node) return chain
def chain_with_multiple_roots(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) root_first = NodeGenerator.secondary_node( model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) root_second = NodeGenerator.secondary_node( model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) chain = Chain() for node in [first, root_first, root_second]: chain.add_node(node) return chain
def chain_with_cycle(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[second, first]) second.nodes_from.append(third) chain = Chain() for node in [first, second, third]: chain.add_node(node) return chain
def compose_chain() -> Chain: chain = Chain() node_first = PrimaryNode('svc') node_second = PrimaryNode('lda') node_third = SecondaryNode('rf') node_third.nodes_from.append(node_first) node_third.nodes_from.append(node_second) chain.add_node(node_third) return chain
def get_composite_lstm_chain(): chain = Chain() node_trend = PrimaryNode('trend_data_model') node_lstm_trend = SecondaryNode('lasso', nodes_from=[node_trend]) node_residual = PrimaryNode('residual_data_model') node_ridge_residual = SecondaryNode('ridge', nodes_from=[node_residual]) node_final = SecondaryNode( 'additive_data_model', nodes_from=[node_ridge_residual, node_lstm_trend]) chain.add_node(node_final) return chain
def default_valid_chain(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) final = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[second, third]) chain = Chain() for node in [first, second, third, final]: chain.add_node(node) return chain
def chain_with_isolated_nodes(): first = NodeGenerator.primary_node(model_type=ModelTypesIdsEnum.logit) second = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[first]) third = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[second]) isolated = NodeGenerator.secondary_node(model_type=ModelTypesIdsEnum.logit, nodes_from=[]) chain = Chain() for node in [first, second, third, isolated]: chain.add_node(node) return chain
def test_regression_chain_with_datamodel_fit_correct(): data = get_synthetic_regression_data() train_data, test_data = train_test_data_setup(data) node_data = PrimaryNode('direct_data_model') node_first = PrimaryNode('ridge') node_second = SecondaryNode('lasso') node_second.nodes_from = [node_first, node_data] chain = Chain(node_second) chain.fit(train_data) results = chain.predict(test_data) assert results.predict.shape == test_data.target.shape
def get_composite_lstm_chain(): chain = Chain() node_trend = PrimaryNode('trend_data_model') node_trend.labels = ["fixed"] node_lstm_trend = SecondaryNode('linear', nodes_from=[node_trend]) node_trend.labels = ["fixed"] node_residual = PrimaryNode('residual_data_model') node_ridge_residual = SecondaryNode('linear', nodes_from=[node_residual]) node_final = SecondaryNode( 'additive_data_model', nodes_from=[node_ridge_residual, node_lstm_trend]) node_final.labels = ["fixed"] chain.add_node(node_final) return chain
def metric_for_nodes(self, metric_function, train_data: InputData, test_data: InputData, is_chain_shared: bool, chain: Chain) -> float: try: validate(chain) if is_chain_shared: chain = SharedChain(base_chain=chain, shared_cache=self.shared_cache) chain.fit(input_data=train_data) return metric_function(chain, test_data) except Exception as ex: print( f'Error in chain assessment during composition: {ex}. Continue.' ) return max_int_value
def calculate_validation_metric(chain: Chain, dataset_to_validate: InputData) -> float: # the execution of the obtained composite models predicted = chain.predict(dataset_to_validate) # the quality assessment for the simulation results roc_auc_value = roc_auc(y_true=dataset_to_validate.target, y_score=predicted.predict) return roc_auc_value
def apply_model_to_data(model: Chain, data_path: str): df, file_path = create_multi_clf_examples_from_excel(data_path, return_df=True) dataset_to_apply = InputData.from_csv(file_path, with_target=False) evo_predicted = model.predict(dataset_to_apply) df['forecast'] = probs_to_labels(evo_predicted.predict) return df
def get_value(cls, chain: Chain, reference_data: InputData) -> float: metric = cls.default_value try: results = chain.predict(reference_data) metric = cls.metric(reference_data, results) except Exception as ex: print(f'Metric evaluation error: {ex}') return metric
def chain_with_incorrect_decomposition_structure(): first = PrimaryNode(model_type='trend_data_model') second = PrimaryNode(model_type='residual_data_model') final = SecondaryNode(model_type='linear', nodes_from=[first, second]) chain = Chain(final) return chain
def get_simple_chain(): first = PrimaryNode(model_type='xgboost') second = PrimaryNode(model_type='knn') final = SecondaryNode(model_type='logit', nodes_from=[first, second]) chain = Chain(final) return chain