Exemple #1
def pytest_generate_tests(metafunc):
    if "scenario_id" in metafunc.fixturenames:
        p_host = metafunc.config.getoption('--host')
        p_api = metafunc.config.getoption('--api')
        p_project = metafunc.config.getoption('--project')
        dataiku.set_remote_dss(p_host, p_api)
        client = dataiku.api_client()
        project = client.get_project(p_project)
        list_scenarios = []
        for scenario in project.list_scenarios():
            if scenario["id"].startswith("TEST_"):
                print("Adding scenario to test :", scenario["id"])
        metafunc.parametrize("scenario_id", list_scenarios)
Exemple #2
def test_run_scenario(params, scenario_id):
    print("Executing scenario ", scenario_id)
    dataiku.set_remote_dss(params["host"], params["api"])
    client = dataiku.api_client()
    project = client.get_project(params["project"])
    scenario_result = project.get_scenario(scenario_id).run_and_wait()
    # scenario_result = project.get_scenario(scenario_id).get_last_runs(limit=1)[0]
    print("Scenario info: ", scenario_result.get_info())
    print("Scenario duration: ", scenario_result.get_duration())
    assert scenario_result.get_details(
    )["scenarioRun"]["result"]["outcome"] == "SUCCESS"
Exemple #3
def test_scenario(params):
    dataiku.set_remote_dss(params["host"], params["api"])
    client = dataiku.api_client()
    project = client.get_project(params["project"])

    # Check that there is at least one scenario TEST_XXXXX & one TEST_SMOKE
    scenarios = project.list_scenarios()
    test_scenario = False
    smoketest_scenario = False
    for scenario in scenarios:
        if scenario["id"].startswith("TEST"):
            test_scenario = True
            if scenario["id"] == "TEST_SMOKE":
                smoketest_scenario = True
    assert test_scenario, "You need at least one test scenario (name starts with 'TEST_')"
    assert smoketest_scenario, "You need at least one smoke test scenario (name 'TEST_SMOKE')"
Exemple #4
def test_coding_recipes_complexity(params):
    dataiku.set_remote_dss(params["host"], params["api"])
    client = dataiku.api_client()
    project = client.get_project(params["project"])

    recipes = project.list_recipes()
    for recipe in recipes:
        if recipe["type"] == "python":
            payload = project.get_recipe(
            code_analysis = cc_raw.analyze(payload)
            assert code_analysis.loc < 100
            assert code_analysis.lloc < 50
            v = cc_visitors.ComplexityVisitor.from_code(payload)
            assert v.complexity < 21, "Code complexity of recipe " + recipe[
                "name"] + " is too complex: " + v.complexity + " > max value (21)"
import dataiku
import sys
import os

host = sys.argv[1]
apiKey = sys.argv[2]
project = sys.argv[3]
bundle_file = sys.argv[4] + '.zip'

dataiku.set_remote_dss(host, apiKey)
client = dataiku.api_client()

# Import bundle
if not (os.path.exists(bundle_file)):
    print("Bundle file named ", bundle_file, " does not exist, cancelling")
bundle_file_stream = open(bundle_file, 'rb')

if project in client.list_project_keys():
    test_project = client.get_project(project)
    test_project = client.get_project(project)
import dataiku
import json
#set_remote_dss() is required for scipting i.e. run from shell
dataiku.set_remote_dss("http://localhost:12000/", "key_placeholder")
client = dataiku.api_client()

eng_params = {
    'aclSynchronizationMode': 'NONE',
    'clearMode': 'DSS_USER',
    'customPropertiesProviderParams': [],
    'dkuProperties': [],
    'extraConf': [],
    'hiveSynchronizationMode': 'KEEP_IN_SYNC',
    'namingRule': {
        'hdfsPathDatasetNamePrefix': '${projectKey}/',
        'hiveDatabaseName': 'eng_db',
        'tableNameDatasetNamePrefix': '${projectKey}_',
        'uploadsPathPrefix': 'uploads'
    'root': '/data/eng'

datateam_params = {
    'aclSynchronizationMode': 'NONE',
    'clearMode': 'DSS_USER',
    'customPropertiesProviderParams': [],
    'dkuProperties': [],
    'extraConf': [],
    'hiveSynchronizationMode': 'KEEP_IN_SYNC',
Exemple #7
    def predict(self, features_df):
        The main prediction method.

        :param: df: a dataframe of 1 or several records to predict

        :return: Either:
            ``decision_series`` or
            ``(decision_series, proba_df)`` or
            ``(decision_series, proba_df, custom_keys_list)``

        decision_series must be a Pandas Series of decisions

        proba_df is optional and must contain one column per class

        custom_keys_list is optional and must contain one entry per input row. Each entry of
        custom_keys_list must be a Python dictionary. These custom keys will be sent in the
        output result

        decision_series, proba_df and custom_keys_list must have the same number of rows than df.


        df = features_df
        data_folder = dataiku.Folder('MzP4vBYB', project_key='PHDATAEMOTION')

        for ind, row in df.iterrows():
            fname = 'ind_{}.mp4'.format(ind)

            df.loc[ind, 'fname'] = fname
                fname, base64.urlsafe_b64decode(row.b64_video.encode('utf-8')))

        client = dataiku.api_client()
        project = client.get_project('PHDATAEMOTION')
        scenario = project.get_scenario('PRODSCOREVIDEOS')

        scenario_run = scenario.run_and_wait()
        success = scenario_run.get_info()['result']['outcome'] == 'SUCCESS'

        ds = project.get_dataset('ProdScoredVideos')
        scores = pd.DataFrame(
            columns=[c['name'] for c in ds.get_schema()['columns']])
        scores = scores.set_index('video_path')

        emotions = [
            'calm', 'sad', 'surprised', 'neutral', 'fearful', 'angry', 'happy',

        df = df.drop(columns=['b64_video']).join(scores, on='fname')

        for ind, row in df.iterrows():
            max_val = 0
            max_label = None
            sum_val = 0

            for e in emotions:
                p = row['prediction_{}_avg'.format(e)]
                sum_val += p
                if p > max_val:
                    max_val = p
                    max_label = e

            df.loc[ind, 'prediction'] = max_label

            for e in emotions:
                df.loc[ind, 'proba_{}'.format(e)] = row[
                    'prediction_{}_avg'.format(e)] / sum_val

        decisions = df.prediction
        proba_df = df[['proba_{}'.format(e) for e in emotions]]

        return (decisions, proba_df)