Python outputtableinfo Exemples

Exemple #1

0

Afficher le fichier

def asterDo():
    # Recipe inputs
    main_input_name = get_input_names_for_role('main')[0]
    input_dataset = dataiku.Dataset(main_input_name)

    # Recipe outputs
    main_output_name = get_output_names_for_role('main')[0]
    output_dataset = dataiku.Dataset(main_output_name)

    # Recipe function param
    dss_function = get_recipe_config().get('function', None)

    # Daitaiku DSS params
    client = dataiku.api_client()
    projectkey = main_input_name.split('.')[0]
    project = client.get_project(projectkey)

    try:
        # output dataset
        outputTable = outputtableinfo(
            output_dataset.get_location_info()['info'], main_output_name,
            get_recipe_config() or {})
    except Exception as error:
        raise RuntimeError(
            """Error obtaining connection settings for output table."""
            """ Make sure connection setting is set to 'Read a database table'."""
            """ This plugin only supports Aster tables.""")

    # input datasets
    try:
        main_input_names = get_input_names_for_role('main')
        inputTables = []
        for inputname in main_input_names:
            inconnectioninfo = dataiku.Dataset(
                inputname).get_location_info()['info']
            inTable = inputtableinfo(inconnectioninfo, inputname, dss_function)
            inputTables.append(inTable)
    except Exception as error:
        raise RuntimeError(
            """Error obtaining connection settings from one of the input tables."""
            """ Make sure connection setting is set to 'Read a database table'."""
            """ This plugin only supports Aster tables.""")

    # actual query
    query = getFunctionsQuery(dss_function, inputTables, outputTable)
    print('\n'.join(query))
    executor = SQLExecutor2(dataset=input_dataset)
    if dss_function.get('dropIfExists', False):
        dropAllQuery = getDropOutputTableArgumentsStatements(
            dss_function.get('arguments', []))
        executor.query_to_df('END TRANSACTION;', dropAllQuery)
    executor.query_to_df("END TRANSACTION;", pre_queries=query)

    # write table schema
    nQuery = '''SELECT * FROM {} LIMIT (1);'''.format(outputTable.tablename)
    selectResult = executor.query_to_df(nQuery)
    output_schema = []
    for column in selectResult.columns:
        output_schema.append({"name": column, "type": "string"})
    output_dataset.write_schema(output_schema)

Exemple #2

0

Afficher le fichier

Fichier : testClusterAnalysis.py Projet : Teradata/vantage-dss-plugin-mle-functions

    def testCanopy(self):
        inputConnectionConfig = {
            'table': 'computers_train1',
            'schema': 'public'
        }

        outputConnectionConfig = {'table': 'canopy_out', 'schema': 'dss'}

        functionInputTable = inputtableinfo(inputConnectionConfig,
                                            'computers_train1_public',
                                            canopyConfig)
        functionOutputTable = outputtableinfo(outputConnectionConfig,
                                              'canopy_out', canopyConfig)
        actualquery = getFunctionsQuery(canopyConfig, [functionInputTable],
                                        functionOutputTable)
        expectedquery = '''CREATE DIMENSION TABLE "dss.canopy_out"
AS
SELECT *
FROM   CANOPY
(
ON (SELECT 1) PARTITION BY 1
INPUTTABLE('computers_train1')
LOOSEDISTANCE('1000')
TIGHTDISTANCE('500')

);'''
        self.assertEqual(actualquery[2], expectedquery,
                         "test TABLE_NAME argument datatype")

Exemple #3

0

Afficher le fichier

Fichier : testTextAnalysis.py Projet : Teradata/vantage-dss-plugin-mle-functions

    def testEvaluateNamedEntityFinderPartition(self):
        testInputConnectionConfig = {
            'table': 'textclassifier_input',
            'schema': 'dss'
        }
        testOuputConnectionConfig = {'table': 'dss_test'}
        testfunction = dssFunction()
        testConfig = testfunction.name(
            'EVALUATENAMEDENTITYFINDERPARTITION').unaliased_inputs_count(
                1).add_unaliased_input('textclassifier_input',
                                       'PartitionByOne').build()
        functionInputTable = inputtableinfo(testInputConnectionConfig,
                                            'textclassifier_input', testConfig)
        functionOutputTable = outputtableinfo(testOuputConnectionConfig,
                                              'dss_test', testConfig)
        actualquery = getFunctionsQuery(testConfig, [functionInputTable],
                                        functionOutputTable)
        expectedquery = """BEGIN TRANSACTION;
DROP TABLE IF EXISTS "public.dss_test";
CREATE DIMENSION TABLE "public.dss_test"
AS
SELECT *
FROM   EVALUATENAMEDENTITYFINDERPARTITION
(
ON dss.textclassifier_input PARTITION BY 1

);
COMMIT;
END TRANSACTION;"""
        self.assertEqual('\n'.join(actualquery + ['END TRANSACTION;']),
                         expectedquery, 'EvaluateNamedEntityFinderPartition')

Exemple #4

0

Afficher le fichier

Fichier : testTextAnalysis.py Projet : Teradata/vantage-dss-plugin-mle-functions

    def testNertrainer(self):
        testInputConnectionConfig = {
            'table': 'ner_sports_train',
            'schema': 'dss'
        }
        testOutputConnectionConfig = {
            'table': 'dss_nertrainer',
            'schema': 'dss'
        }
        functionInputTable = inputtableinfo(testInputConnectionConfig,
                                            'ner_sports_train',
                                            nertrainerConfig)
        functionOutputTable = outputtableinfo(testOutputConnectionConfig,
                                              'dss_nertrainer',
                                              nertrainerConfig)
        actualquery = getFunctionsQuery(nertrainerConfig, [functionInputTable],
                                        functionOutputTable)
        expectedquery = """BEGIN TRANSACTION;
DROP TABLE IF EXISTS "dss.dss_nertrainer";
CREATE DIMENSION TABLE "dss.dss_nertrainer"
AS
SELECT *
FROM   NERTRAINER
(
ON dss.ner_sports_train PARTITION BY id
TEXTCOLUMN('content')
MODELFILE('ner_model.bin')
MAXITERATION('1000')
FEATURETEMPLATE('template_1.txt')

);
COMMIT;
END TRANSACTION;"""
        self.assertEqual('\n'.join(actualquery) + '\nEND TRANSACTION;',
                         expectedquery, 'Nertrainer test INTEGER argument')

Exemple #5

0

Afficher le fichier

Fichier : testTextAnalysis.py Projet : Teradata/vantage-dss-plugin-mle-functions

    def testTextParser(self):
        testInputConnectionConfig = {
            'table': 'textparser_input',
            'schema': 'dss'
        }
        testOutputConnectionConfig = {
            'table': 'textparser_output',
            'schema': 'dss'
        }
        functionInputTable = inputtableinfo(testInputConnectionConfig,
                                            'textparser_input',
                                            textParserConfig)
        functionOutputTable = outputtableinfo(testOutputConnectionConfig,
                                              'textparser_output',
                                              textParserConfig)
        actualquery = getFunctionsQuery(textParserConfig, [functionInputTable],
                                        functionOutputTable)
        expectedquery = """CREATE DIMENSION TABLE "dss.textparser_output"
AS
SELECT *
FROM   TEXT_PARSER
(
ON dss.textparser_input PARTITION BY ANY
TEXT_COLUMN('text_data')
CASE_INSENSITIVE('True')
OUTPUT_BY_WORD('True')
REMOVE_STOP_WORDS('True')
STOP_WORDS('stopwords.txt')
PUNCTUATION('\[.,?\!\]')
LIST_POSITIONS('True')
ACCUMULATE('doc_id', 'category')

);"""
        self.assertEqual(actualquery[2], expectedquery,
                         'test boolean arguments')

Exemple #6

0

Afficher le fichier

Fichier : testTextAnalysis.py Projet : Teradata/vantage-dss-plugin-mle-functions

    def testTextTokenizer(self):

        textTokenizerOutputConnectionConfig = {'table': 'text_tokenized'}

        textTokenizerInputConnectionConfig = {
            'table': 'text_chunked',
            'schema': 'bs186029'
        }

        functionInputTable = inputtableinfo(textTokenizerInputConnectionConfig,
                                            'text_chunked',
                                            textTokenizerConfig)
        functionOutputTable = outputtableinfo(
            textTokenizerOutputConnectionConfig, 'text_tokenized',
            textTokenizerConfig)
        actualquery = getFunctionsQuery(textTokenizerConfig,
                                        [functionInputTable],
                                        functionOutputTable)
        expectedquery = """BEGIN TRANSACTION;
DROP TABLE IF EXISTS "public.text_tokenized";
CREATE DIMENSION TABLE "public.text_tokenized"
AS
SELECT *
FROM   TEXTTOKENIZER
(
ON bs186029.text_chunked PARTITION BY ANY
TEXTCOLUMN('chunk')

);
COMMIT;
END TRANSACTION;"""
        self.assertEqual('\n'.join(actualquery) + '\nEND TRANSACTION;',
                         expectedquery, "TextTokenizer")

Exemple #7

0

Afficher le fichier

Fichier : testTextAnalysis.py Projet : Teradata/vantage-dss-plugin-mle-functions

    def testNer(self):
        testInputConnectionConfig = {
            'table': 'ner_sports_test',
            'schema': 'dss'
        }
        testRuleConnectionConfig = {'table': 'rule_table1', 'schema': 'dss'}
        testOutputConnectionConfig = {'table': 'dss_ner', 'schema': 'dss'}
        functionInputTable = inputtableinfo(testInputConnectionConfig,
                                            'ner_sports_test', nerConfig)
        ruleTable = inputtableinfo(testRuleConnectionConfig, 'rule_table',
                                   nerConfig)
        functionOutputTable = outputtableinfo(testOutputConnectionConfig,
                                              'dss_ner', nerConfig)
        actualquery = getFunctionsQuery(nerConfig,
                                        [functionInputTable, ruleTable],
                                        functionOutputTable)
        expectedquery = """BEGIN TRANSACTION;
DROP TABLE IF EXISTS "dss.dss_ner";
CREATE DIMENSION TABLE "dss.dss_ner"
AS
SELECT *
FROM   NER
(
ON dss.ner_sports_test PARTITION BY ANY
ON dss.rule_table1 AS "rules" DIMENSION
TEXTCOLUMN('content')
MODELS('ner_model.bin')
ACCUMULATE('id')
SHOWCONTEXT('2')

);
COMMIT;
END TRANSACTION;"""
        self.assertEqual('\n'.join(actualquery) + '\nEND TRANSACTION;',
                         expectedquery, 'Nerevaluator')

Exemple #8

0

Afficher le fichier

Fichier : testTextAnalysis.py Projet : Teradata/vantage-dss-plugin-mle-functions

    def testNerEvaluator(self):
        testInputConnectionConfig = {
            'table': 'ner_sports_test',
            'schema': 'dss'
        }
        testOutputConnectionConfig = {
            'table': 'dss_nerevaluator',
            'schema': 'dss'
        }
        functionInputTable = inputtableinfo(testInputConnectionConfig,
                                            'ner_sports_test',
                                            nerEvaluatorConfig)
        functionOutputTable = outputtableinfo(testOutputConnectionConfig,
                                              'dss_ner', nerEvaluatorConfig)
        actualquery = getFunctionsQuery(nerEvaluatorConfig,
                                        [functionInputTable],
                                        functionOutputTable)
        expectedquery = """BEGIN TRANSACTION;
DROP TABLE IF EXISTS "dss.dss_nerevaluator";
CREATE DIMENSION TABLE "dss.dss_nerevaluator"
AS
SELECT *
FROM   NEREVALUATOR
(
ON dss.ner_sports_test PARTITION BY id
TEXTCOLUMN('content')
MODEL('ner_model.bin')

);
COMMIT;
END TRANSACTION;"""
        self.assertEqual('\n'.join(actualquery) + '\nEND TRANSACTION;',
                         expectedquery, 'Nertrainer')

Exemple #9

0

Afficher le fichier

    def testNaiveBayes(self):
        inputConnectionConfig = {
            'table': 'dss_nb_iris_dataset_train',
            'schema': 'public'
        }

        outputConnectionConfig = {
            'table': 'naivebayestrain_out',
            'schema': 'dss'
        }

        functionInputTable = inputtableinfo(inputConnectionConfig,
                                            'dss_nb_iris_dataset_train',
                                            naiveBayesTrainConfig)
        functionOutputTable = outputtableinfo(outputConnectionConfig,
                                              'naivebayestrain_out',
                                              naiveBayesTrainConfig)
        actualquery = getFunctionsQuery(naiveBayesTrainConfig,
                                        [functionInputTable],
                                        functionOutputTable)
        expectedquery = '''BEGIN TRANSACTION;
DROP TABLE IF EXISTS "dss.naivebayestrain_out";
CREATE DIMENSION TABLE "dss.naivebayestrain_out"
AS
(SELECT * FROM NAIVEBAYESREDUCE (ON (SELECT * FROM NAIVEBAYESMAP (ON public.dss_nb_iris_dataset_train   NUMERICINPUTS('sepal_length', 'sepal_width', 'petal_length', 'petal_width')
RESPONSE('species')
)) PARTITION BY class  ));
COMMIT;
END TRANSACTION;'''
        self.assertEqual(
            '\n'.join(actualquery + ['END TRANSACTION;']), expectedquery,
            "test CASCADED FUNCTIONS NAIVEBAYESMAP AND NAIVEBAYESREDUCE")

Exemple #10

0

Afficher le fichier

    def testNpath(self):
        inputConnectionConfig = {'table': 'tv_shows'}

        outputConnectionConfig = {
            'table': 'dss_tv_show_npath',
            'schema': 'dss'
        }

        functionInputTable = inputtableinfo(inputConnectionConfig, 'tv_shows',
                                            npathConfig)
        functionOutputTable = outputtableinfo(outputConnectionConfig,
                                              'dss_tv_show_npath', npathConfig)
        actualquery = getFunctionsQuery(npathConfig, [functionInputTable],
                                        functionOutputTable)
        expectedquery = '''CREATE DIMENSION TABLE "dss.dss_tv_show_npath"
AS
SELECT *
FROM   NPATH
(
ON public.tv_shows PARTITION BY id ORDER BY ts
MODE('nonoverlapping')
PATTERN('S1{1,3}.S2')
SYMBOLS(tvshow='BreakingBad' as S2, tvshow <> 'BreakingBad' as S1)
RESULT(accumulate(tvshow of any(S1,S2)) as path)

);'''
        self.assertEqual(actualquery[2], expectedquery, "Npath")

Exemple #11

0

Afficher le fichier

Fichier : astermain.py Projet : isabella232/vantage-dss-plugin-sqle-functions

def asterDo():
    """
    Takes the parameters set by the user from the UI, creates the query, and then executes it.
    """
    # Formatting options.
    SEP_LENGTH = 80
    SEP = "=" * SEP_LENGTH

    # Recipe inputs
    main_input_name = get_input_names_for_role('main')[0]
    input_dataset = dataiku.Dataset(main_input_name)

    # Recipe outputs
    main_output_name = get_output_names_for_role('main')[0]
    output_dataset = dataiku.Dataset(main_output_name)

    # Daitaiku DSS params
    client = dataiku.api_client()
    projectkey = main_input_name.split('.')[0]
    project = client.get_project(projectkey)

    # Connection properties.
    # TODO: Handle Input and Output Table connection properties.
    properties = input_dataset.get_location_info(
        sensitive_info=True)['info'].get('connectionParams').get('properties')
    autocommit = input_dataset.get_location_info(
        sensitive_info=True)['info'].get('connectionParams').get(
            'autocommitMode')

    # SQL Executor.
    executor = SQLExecutor2(dataset=input_dataset)

    # Handle pre- and post-query additions.
    # Assume autocommit TERA mode by default.
    pre_query = None
    post_query = None

    print(SEP)
    if not autocommit:
        print("NOT AUTOCOMMIT MODE.")
        print("Assuming TERA mode.")
        pre_query = ["BEGIN TRANSACTION;"]
        post_query = ["END TRANSACTION;"]
        for prop in properties:
            if prop['name'] == 'TMODE':
                if prop['value'] == 'ANSI':
                    print("ANSI mode.")
                    pre_query = [";"]
                    post_query = ["COMMIT WORK;"]

    else:
        print("AUTOCOMMIT MODE.")
        print("No pre- and post-query.")
    print(SEP)

    # Recipe function param
    dss_function = get_recipe_config().get('function', None)
    pp = pprint.PrettyPrinter(indent=4)

    print(SEP)
    print('DSS Function:')
    pp.pprint(dss_function)
    print(SEP)

    # output dataset
    try:
        outputTable = outputtableinfo(
            output_dataset.get_location_info()['info'], main_output_name,
            get_recipe_config() or {})
    except Exception as error:
        raise RuntimeError(
            """Error obtaining connection settings for output table."""
            """ This plugin only supports Teradata tables.""")

    # input datasets
    try:
        main_input_names = get_input_names_for_role('main')
        inputTables = []
        for inputname in main_input_names:
            inconnectioninfo = dataiku.Dataset(
                inputname).get_location_info()['info']
            inTable = inputtableinfo(inconnectioninfo, inputname, dss_function)
            inputTables.append(inTable)
    except Exception as error:
        raise RuntimeError(
            """Error obtaining connection settings from one of the input tables."""
            """ This plugin only supports Teradata tables.""")

    # Handle dropping of output tables.
    if dss_function.get('dropIfExists', False):
        print("Preparing to drop tables.")
        drop_query = dropTableStatement(outputTable)

        print(SEP)
        print("DROP query:")
        print(drop_query)
        print(SEP)
        try:
            # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the
            # "START TRANSACTION;" block applies for non-autocommit TERA mode connections.
            if not autocommit:
                executor.query_to_df(pre_query)
            executor.query_to_df(drop_query, post_queries=post_query)
        except Exception as e:
            print(e)

        # Drop other output tables if they exist.
        drop_all_query = getDropOutputTableArgumentsStatements(
            dss_function.get('output_tables', []))

        print(SEP)
        print('Drop ALL Query:')
        print(drop_all_query)
        print(SEP)

        for drop_q in drop_all_query:
            # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the
            # "START TRANSACTION;" block applies for non-autocommit TERA mode connections.
            if not autocommit:
                executor.query_to_df(pre_query)
            executor.query_to_df(drop_q, post_queries=post_query)

    # CREATE query.
    create_query = getFunctionsQuery(dss_function, inputTables, outputTable,
                                     get_recipe_config() or {})
    print(SEP)
    print("CREATE query:")
    print(create_query)
    print(SEP)
    # Detect error
    try:
        # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the
        # "START TRANSACTION;" block applies for non-autocommit TERA mode connections.
        if not autocommit:
            executor.query_to_df(pre_query)
        executor.query_to_df(create_query, post_queries=post_query)
    except Exception as error:
        err_str = str(error)
        err_str_list = err_str.split(" ")
        # trying to shorten the error for the modal in front-end
        if len(err_str_list) > 15:
            print(SEP)
            print(error)
            print(SEP)
            new_err_str = err_str_list[:15]
            new_err_str.append("\n\n")
            new_err_str.append("...")
            new_err_str = " ".join(new_err_str)
            raise RuntimeError(new_err_str)
        else:
            raise RuntimeError(err_str)

    print('Moving results to output...')

    select_sample_query = 'SELECT * from ' + outputTable.tablename + ' SAMPLE 0'
    # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the
    # "START TRANSACTION;" block applies for non-autocommit TERA mode connections.
    if not autocommit:
        executor.query_to_df(pre_query)
    sel_res = executor.query_to_df(select_sample_query,
                                   post_queries=post_query)

    pythonrecipe_out = output_dataset
    pythonrecipe_out.write_schema_from_dataframe(sel_res)

    # Additional Tables
    outtables = dss_function.get('output_tables', [])
    if (outtables != []):
        tableCounter = 1
        print('Working on output tables')
        print(get_output_names_for_role('main'))
        print(outtables)
        for table in outtables:
            if table.get('value') != '' and table.get('value') != None:
                # try:
                print('Table')
                print(table)
                #Need to change this to max of split in order for multiple database or no-database table inputs
                #Change below might fix issue 4 of Jan 4 2018 for Nico. For non-drop tables
                try:
                    main_output_name2 = list(
                        filter(
                            lambda out_dataset: out_dataset.split('.')[len(
                                out_dataset.split('.')) - 1] == table.
                            get('value').split('.')[len(
                                table.get('value').split('.')) - 1].strip('\"'
                                                                          ),
                            get_output_names_for_role('main')))[0]
                except Exception as error:
                    # print(error.message)
                    raise RuntimeError(
                        'Unable to find an output dataset for ' +
                        table.get('value') +
                        'It may not exist as an output Dataset: ' +
                        table.get('value') + "\n\Runtime Error:" +
                        error.message)
                print('Output name 2')
                print(main_output_name2)
                output_dataset2 = dataiku.Dataset(main_output_name2)
                # print("od2 printer")
                tableNamePrefix = output_dataset2.get_location_info(
                    sensitive_info=True)['info'].get('connectionParams').get(
                        'namingRule').get('tableNameDatasetNamePrefix')
                if tableNamePrefix != None or tableNamePrefix != '':
                    print('Table prefix is not empty:' + tableNamePrefix)
                # except:
                #     #Need to change this error
                #     print('Error: Dataset for' + table.get('name') + ' not found')
                #     raise Value
                customOutputTableSQL = 'SELECT * from ' + table.get(
                    'value') + ' SAMPLE 0'
                print('Working on table number:')
                print(tableCounter)
                print(customOutputTableSQL)
                # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the
                # "START TRANSACTION;" block applies for non-autocommit TERA mode connections.
                if not autocommit:
                    executor.query_to_df(pre_query)
                sel_res = executor.query_to_df(customOutputTableSQL,
                                               post_queries=post_query)
                # selRes = executor.query_to_df(customOutputTableSQL, pre_queries=pre_query, post_queries=post_query)
                tableCounter += 1
                pythonrecipe_out2 = output_dataset2
                pythonrecipe_out2.write_schema_from_dataframe(selRes)
    print('Complete!')


# Uncomment end

Exemple #12

0

Afficher le fichier

def asterDo():
    # Recipe inputs
    main_input_name = get_input_names_for_role('main')[0]
    input_dataset = dataiku.Dataset(main_input_name)    

    # print('Connection info:')
    # print(input_dataset.get_location_info(sensitive_info=True)['info'])
    properties = input_dataset.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('properties')
    autocommit = input_dataset.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('autocommitMode')
    
    requiresTransactions = True
     
    print('I am assuming in TERA MODE by default')
    if autocommit:
        print('I am assuming autocommmit TERA mode')
        requiresTransactions = False
        tmode = 'TERA'
        stTxn = ';'
        edTxn = ';'
    else:
        #Detected TERA
        print('I am assuming non-autocommit TERA MODE')
        tmode = 'TERA'
        stTxn = 'BEGIN TRANSACTION;'
        edTxn = 'END TRANSACTION;'

    for prop in properties:
        if prop['name'] == 'TMODE' and prop['value'] == 'TERA':
            if autocommit:
                print('I am in autocommmit TERA mode')
                requiresTransactions = False
                tmode = 'TERA'
                stTxn = ';'
                edTxn = ';'
            else:
                #Detected TERA
                print('I am in TERA MODE')
                tmode = 'TERA'
                stTxn = 'BEGIN TRANSACTION;'
                edTxn = 'END TRANSACTION;'
        elif prop['name'] == 'TMODE' and prop['value'] == 'ANSI':
            #Detected ANSI
            print('I am in ANSI MODE')
            tmode = 'ANSI'
            stTxn = ';'
            edTxn = 'COMMIT WORK;'
    print(properties)
    


    # Recipe outputs
    main_output_name = get_output_names_for_role('main')[0]
    output_dataset =  dataiku.Dataset(main_output_name)
    
    # Recipe function param
    dss_function = get_recipe_config().get('function', None)
    print ("\n=================================\n")
    print ('Showing DSS Function')
    print (dss_function)
    print ("\n=================================\n")
    
    # Daitaiku DSS params
    client = dataiku.api_client()
    projectkey = main_input_name.split('.')[0]
    project = client.get_project(projectkey)

    try:
        # output dataset
        outputTable = outputtableinfo(output_dataset.get_location_info()['info'], main_output_name,
                                  get_recipe_config() or {})
    except Exception as error:
        raise RuntimeError("""Error obtaining connection settings for output table."""                           
                           """ This plugin only supports Teradata tables.""")
        # raise RuntimeError("""Error obtaining connection settings for output table."""
        #                    """ Make sure connection setting is set to 'Read a database table'."""
        #                    """ This plugin only supports Aster tables.""")

    # input datasets
    try:
        main_input_names = get_input_names_for_role('main')
        inputTables = []
        for inputname in main_input_names:
            inconnectioninfo = dataiku.Dataset(inputname).get_location_info()['info']
            inTable = inputtableinfo(inconnectioninfo, inputname, dss_function)
            inputTables.append(inTable)
    except Exception as error:
        raise RuntimeError("""Error obtaining connection settings from one of the input tables."""                           
                           """ This plugin only supports Teradata tables.""")
        
    # actual query
    query = getFunctionsQuery(dss_function, inputTables, outputTable, get_recipe_config() or {})
    # print("=============================")
    # print (query)
    # print("=============================")
    # raise RuntimeError("""I Just wanted to make this execution stop: """)
    
    # Uncomment below
    executor = SQLExecutor2(dataset=input_dataset)   
    
   

    if dss_function.get('dropIfExists', False):

        # 
        dropTableStr = dropTableStatement(outputTable)
        # executor.query_to_df(dropTableStr)
        if requiresTransactions:
            print('Start transaction for drop')
            print(stTxn)
            executor.query_to_df(stTxn)

            print('End transaction for drop')
            print(edTxn)
            executor.query_to_df(edTxn,[dropTableStr])
        else:
            executor.query_to_df([dropTableStr])

        #Start transaction
        #Move to drop query and make each DROP run separately
        dropAllQuery = getDropOutputTableArgumentsStatements(dss_function.get('output_tables', []))
        print('Drop ALL Query:')
        print(dropAllQuery)        
        
        # if requiresTransactions:
            # print('Start transaction for drop')
            # print(stTxn)
            # executor.query_to_df(stTxn)
        # dropAllQuery = getDropOutputTableArgumentsStatements(dss_function.get('arguments', []))
        
        #Change dropAllQuery to string/s? or execute one drop per item in list.            
        if requiresTransactions:
            for dropTblStmt in dropAllQuery:
                print('Start transaction for drop')
                print(stTxn)
                executor.query_to_df(stTxn)

                print('End transaction for drop')
                print(edTxn)
                executor.query_to_df(edTxn,[dropTblStmt])
            # executor.query_to_df(edTxn)
        else:
            for dropTblStmt in dropAllQuery:
                executor.query_to_df([dropTblStmt])
    # executor.query_to_df("END TRANSACTION;", pre_queries=query)
    
    # write table schema ACTUAL COMMENT
    # nQuery = '''SELECT * FROM {} LIMIT (1);'''.format(outputTable.tablename)
    # selectResult = executor.query_to_df(nQuery);
    # output_schema = []
    # for column in selectResult.columns:
    #     output_schema.append({"name":column, "type":"string"})
    # output_dataset.write_schema(output_schema)
    print('\n'.join(query))
    # print(dss_function)

    # recipe_output_table = dss_function.get('recipeOutputTable', "")
    # print('recipe_output_table before IF')
    # print(recipe_output_table)
    if requiresTransactions:
        print('Start transaction for create table')
        print(stTxn)
        executor.query_to_df(stTxn)

    # Detect error
    try:
        selectResult = executor.query_to_df(query)
    except Exception as error:

        err_str = str(error)
        err_str_list = err_str.split(" ")
        # trying to shorten the error for the modal in front-end
        if len(err_str_list) > 15:
            print ("\n=================================\n")
            print (error)
            print ("\n=================================\n")
            new_err_str = err_str_list[:15]
            new_err_str.append("\n\n")
            new_err_str.append("...")
            new_err_str = " ".join(new_err_str)
            raise RuntimeError(new_err_str)
        else:
            raise RuntimeError(err_str)

    if requiresTransactions:
        print('End transaction for create table')
        print(edTxn)
        executor.query_to_df(edTxn)

    
    print('Moving results to output...')

    if requiresTransactions:
        print('Start transaction for schema building')
        print(stTxn)
        executor.query_to_df(stTxn)

    # pythonrecipe_out = output_dataset
    # pythonrecipe_out.write_with_schema(selectResult)
    customOutputTableSQL = 'SELECT * from '+ outputTable.tablename + ' SAMPLE 0'
    selRes = executor.query_to_df(customOutputTableSQL)

    pythonrecipe_out = output_dataset
    pythonrecipe_out.write_schema_from_dataframe(selRes)

    # Additional Tables
    outtables = dss_function.get('output_tables', [])
    if(outtables != []):
        tableCounter = 1
        print('Working on output tables')
        print(get_output_names_for_role('main'))
        print(outtables)
        for table in outtables:
            if table.get('value') != '' and table.get('value') != None:
                # try:
                print('Table')
                print(table)
                #Need to change this to max of split in order for multiple database or no-database table inputs
                #Change below might fix issue 4 of Jan 4 2018 for Nico. For non-drop tables
                try:
                    main_output_name2 = list(filter(lambda out_dataset: out_dataset.split('.')[len(out_dataset.split('.'))-1] == table.get('value').split('.')[len(table.get('value').split('.'))-1].strip('\"'),get_output_names_for_role('main')))[0]
                except Exception as error:
                    # print(error.message)                    
                    raise RuntimeError('Unable to find an output dataset for '+table.get('value')+ 'It may not exist as an output Dataset: '+table.get('value')+"\n\Runtime Error:"+ error.message)
                print('Output name 2')
                print(main_output_name2)
                output_dataset2 =  dataiku.Dataset(main_output_name2)   
                # print("od2 printer")
                tableNamePrefix = output_dataset2.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('namingRule').get('tableNameDatasetNamePrefix')
                if tableNamePrefix != None or tableNamePrefix != '':
                    print('Table prefix is not empty:' + tableNamePrefix)
                # except:
                #     #Need to change this error
                #     print('Error: Dataset for' + table.get('name') + ' not found')  
                #     raise Value              
                customOutputTableSQL = 'SELECT * from '+ table.get('value') + ' SAMPLE 0'
                print('Working on table number:')
                print(tableCounter)
                print(customOutputTableSQL)
                selRes = executor.query_to_df(customOutputTableSQL)
                tableCounter += 1
                pythonrecipe_out2 = output_dataset2
                pythonrecipe_out2.write_schema_from_dataframe(selRes)
    if requiresTransactions:
        print('End transaction for schema building')
        print(edTxn)
        executor.query_to_df(edTxn)
    print('Complete!')  


# Uncomment end

Exemple #13

0

Afficher le fichier

Fichier : testAttribution.py Projet : Teradata/vantage-dss-plugin-mle-functions

 def createTable(self, config, metadata, mode='input'):
     if 'output' == mode:
         return outputtableinfo(config, config.get('dataset', ""), metadata)
     return inputtableinfo(config, config.get('table', ""), metadata)