def __call__(self, func, *args, **kwargs): if isinstance(func, FilterInterface): return DataProcessor(self._data.loc[func.apply(self._data)]) else: ret_value = apply_func(self._data, func, *args, **kwargs) if not isinstance(ret_value, type(self._data)): raise TypeError( f'Call to DataProcessor should return type {type(self._data)} but returned {type(ret_value)}') return DataProcessor(ret_value)
"id": row.get(id_labels_column), "text": row.get(text_column), "lang": row.get(lang_column), "concept_id": row.get(concept_id_column), "precision": row.get(precision_column) }]).dict() def train_model(client): client.model.train(model_id) return str(client.model.wait_training(model_id)) cl_client = get_client(get_recipe_config()) init_model(cl_client) apply_func(upload_concepts, client=cl_client, input_dataset="concepts_input_dataset", output_dataset="concepts_output_dataset") apply_func(upload_labels, client=cl_client, input_dataset="labels_input_dataset", output_dataset="labels_output_dataset") train_model(cl_client)
for k, v in row.items() if k.startswith(variation_prefix) } return client.faq.create_questions(model_id, questions=[{ "id": row.get(questions_id_column), "variations": variations, "answer_id": row.get(answer_id_column) }]).dict() def train_model(client): client.model.train(model_id) return str(client.model.wait_training(model_id)) cl_client = get_client(get_recipe_config()) init_model(cl_client) apply_func(upload_answers, client=cl_client, input_dataset="answers_input_dataset", output_dataset="answers_output_dataset") apply_func(upload_questions, client=cl_client, input_dataset="questions_input_dataset", output_dataset="questions_output_dataset") train_model(cl_client)
from dataiku.customrecipe import get_recipe_config from utils import apply_func url_column = get_recipe_config().get("url_column") def call_api(client, row): return client.html.extract_article(row.get(url_column)).dict() apply_func(call_api)
except ModelNotFound: client.model.create(model_id, "clf") def call_api(client, row): return client.classifier.create_documents(model_id, documents=[{ "text": row.get(text_column), "lang": row.get(lang_column), "class_id": row.get(class_id_column), "id": row.get(id_column) }]).dict() def train_model(client): client.model.train(model_id, model_type=model_type, model_config={"train_ratio": train_ratio}) return str(cl_client.model.wait_training(model_id)) cl_client = get_client(get_recipe_config()) init_model(cl_client) apply_func(call_api, client=cl_client) train_model(cl_client)