def pytest_generate_tests(metafunc): if "scenario_id" in metafunc.fixturenames: p_host = metafunc.config.getoption('--host') p_api = metafunc.config.getoption('--api') p_project = metafunc.config.getoption('--project') dataiku.set_remote_dss(p_host, p_api) client = dataiku.api_client() project = client.get_project(p_project) list_scenarios = [] for scenario in project.list_scenarios(): if scenario["id"].startswith("TEST_"): print("Adding scenario to test :", scenario["id"]) list_scenarios.append(scenario["id"]) metafunc.parametrize("scenario_id", list_scenarios)
def test_run_scenario(params, scenario_id): print("*************************") print("Executing scenario ", scenario_id) dataiku.set_remote_dss(params["host"], params["api"]) client = dataiku.api_client() project = client.get_project(params["project"]) scenario_result = project.get_scenario(scenario_id).run_and_wait() # scenario_result = project.get_scenario(scenario_id).get_last_runs(limit=1)[0] print("Scenario info: ", scenario_result.get_info()) print("Scenario duration: ", scenario_result.get_duration()) print(scenario_result.get_details()["scenarioRun"]["result"]) print(scenario_result.get_details()["scenarioRun"]["result"]["outcome"]) assert scenario_result.get_details( )["scenarioRun"]["result"]["outcome"] == "SUCCESS"
def test_scenario(params): dataiku.set_remote_dss(params["host"], params["api"]) client = dataiku.api_client() project = client.get_project(params["project"]) # Check that there is at least one scenario TEST_XXXXX & one TEST_SMOKE scenarios = project.list_scenarios() test_scenario = False smoketest_scenario = False for scenario in scenarios: if scenario["id"].startswith("TEST"): test_scenario = True if scenario["id"] == "TEST_SMOKE": smoketest_scenario = True assert test_scenario, "You need at least one test scenario (name starts with 'TEST_')" assert smoketest_scenario, "You need at least one smoke test scenario (name 'TEST_SMOKE')"
def test_coding_recipes_complexity(params): dataiku.set_remote_dss(params["host"], params["api"]) client = dataiku.api_client() project = client.get_project(params["project"]) recipes = project.list_recipes() for recipe in recipes: if recipe["type"] == "python": print(recipe) payload = project.get_recipe( recipe["name"]).get_definition_and_payload().get_payload() code_analysis = cc_raw.analyze(payload) print(code_analysis) assert code_analysis.loc < 100 assert code_analysis.lloc < 50 v = cc_visitors.ComplexityVisitor.from_code(payload) assert v.complexity < 21, "Code complexity of recipe " + recipe[ "name"] + " is too complex: " + v.complexity + " > max value (21)"
import dataiku import sys import os host = sys.argv[1] apiKey = sys.argv[2] project = sys.argv[3] bundle_file = sys.argv[4] + '.zip' dataiku.set_remote_dss(host, apiKey) client = dataiku.api_client() # Import bundle if not (os.path.exists(bundle_file)): print("Bundle file named ", bundle_file, " does not exist, cancelling") sys.exit(1) bundle_file_stream = open(bundle_file, 'rb') if project in client.list_project_keys(): test_project = client.get_project(project) test_project.import_bundle_from_stream(bundle_file_stream) else: client.create_project_from_bundle_archive(bundle_file_stream) test_project = client.get_project(project)
import dataiku import json #set_remote_dss() is required for scipting i.e. run from shell dataiku.set_remote_dss("http://localhost:12000/", "key_placeholder") client = dataiku.api_client() eng_params = { 'aclSynchronizationMode': 'NONE', 'clearMode': 'DSS_USER', 'customPropertiesProviderParams': [], 'dkuProperties': [], 'extraConf': [], 'hiveSynchronizationMode': 'KEEP_IN_SYNC', 'namingRule': { 'hdfsPathDatasetNamePrefix': '${projectKey}/', 'hiveDatabaseName': 'eng_db', 'tableNameDatasetNamePrefix': '${projectKey}_', 'uploadsPathPrefix': 'uploads' }, 'overridePreCreateManagedDatasetFolderBeforeMetastoreSyncForRecipes': False, 'root': '/data/eng' } datateam_params = { 'aclSynchronizationMode': 'NONE', 'clearMode': 'DSS_USER', 'customPropertiesProviderParams': [], 'dkuProperties': [], 'extraConf': [], 'hiveSynchronizationMode': 'KEEP_IN_SYNC',
def predict(self, features_df): """ The main prediction method. :param: df: a dataframe of 1 or several records to predict :return: Either: ``decision_series`` or ``(decision_series, proba_df)`` or ``(decision_series, proba_df, custom_keys_list)`` decision_series must be a Pandas Series of decisions proba_df is optional and must contain one column per class custom_keys_list is optional and must contain one entry per input row. Each entry of custom_keys_list must be a Python dictionary. These custom keys will be sent in the output result decision_series, proba_df and custom_keys_list must have the same number of rows than df. """ dataiku.set_remote_dss('http://52.71.116.91:80', 'hTeQ6RbM8u4ASGMthBChKxDfniusNoZu') df = features_df data_folder = dataiku.Folder('MzP4vBYB', project_key='PHDATAEMOTION') for ind, row in df.iterrows(): fname = 'ind_{}.mp4'.format(ind) df.loc[ind, 'fname'] = fname data_folder.upload_data( fname, base64.urlsafe_b64decode(row.b64_video.encode('utf-8'))) client = dataiku.api_client() project = client.get_project('PHDATAEMOTION') scenario = project.get_scenario('PRODSCOREVIDEOS') scenario_run = scenario.run_and_wait() success = scenario_run.get_info()['result']['outcome'] == 'SUCCESS' ds = project.get_dataset('ProdScoredVideos') scores = pd.DataFrame( data=list(ds.iter_rows()), columns=[c['name'] for c in ds.get_schema()['columns']]) scores = scores.set_index('video_path') emotions = [ 'calm', 'sad', 'surprised', 'neutral', 'fearful', 'angry', 'happy', 'disgust' ] df = df.drop(columns=['b64_video']).join(scores, on='fname') for ind, row in df.iterrows(): max_val = 0 max_label = None sum_val = 0 for e in emotions: p = row['prediction_{}_avg'.format(e)] sum_val += p if p > max_val: max_val = p max_label = e df.loc[ind, 'prediction'] = max_label for e in emotions: df.loc[ind, 'proba_{}'.format(e)] = row[ 'prediction_{}_avg'.format(e)] / sum_val decisions = df.prediction proba_df = df[['proba_{}'.format(e) for e in emotions]] return (decisions, proba_df)