def run_chain_from_automl(train_file_path: str, test_file_path: str, max_run_time: timedelta = timedelta(minutes=10)): train_data = InputData.from_csv(train_file_path) test_data = InputData.from_csv(test_file_path) testing_target = test_data.target chain = Chain() node_tpot = PrimaryNode('tpot') node_tpot.model.params = {'max_run_time_sec': max_run_time.seconds} node_lda = PrimaryNode('lda') node_rf = SecondaryNode('rf') node_rf.nodes_from = [node_tpot, node_lda] chain.add_node(node_rf) chain.fit(train_data) results = chain.predict(test_data) roc_auc_value = roc_auc(y_true=testing_target, y_score=results.predict) print(roc_auc_value) return roc_auc_value
def test_regression_chain_with_datamodel_fit_correct(): data = get_synthetic_regression_data() train_data, test_data = train_test_data_setup(data) node_data = PrimaryNode('direct_data_model') node_first = PrimaryNode('ridge') node_second = SecondaryNode('lasso') node_second.nodes_from = [node_first, node_data] chain = Chain(node_second) chain.fit(train_data) results = chain.predict(test_data) assert results.predict.shape == test_data.target.shape
def test_chain_with_datamodel_fit_correct(data_setup): data = data_setup train_data, test_data = train_test_data_setup(data) chain = Chain() node_data = PrimaryNode('direct_data_model') node_first = PrimaryNode('bernb') node_second = SecondaryNode('rf') node_second.nodes_from = [node_first, node_data] chain.add_node(node_data) chain.add_node(node_first) chain.add_node(node_second) chain.fit(train_data) results = np.asarray(probs_to_labels(chain.predict(test_data).predict)) assert results.shape == test_data.target.shape