def asterDo(): # Recipe inputs main_input_name = get_input_names_for_role('main')[0] input_dataset = dataiku.Dataset(main_input_name) # Recipe outputs main_output_name = get_output_names_for_role('main')[0] output_dataset = dataiku.Dataset(main_output_name) # Recipe function param dss_function = get_recipe_config().get('function', None) # Daitaiku DSS params client = dataiku.api_client() projectkey = main_input_name.split('.')[0] project = client.get_project(projectkey) try: # output dataset outputTable = outputtableinfo( output_dataset.get_location_info()['info'], main_output_name, get_recipe_config() or {}) except Exception as error: raise RuntimeError( """Error obtaining connection settings for output table.""" """ Make sure connection setting is set to 'Read a database table'.""" """ This plugin only supports Aster tables.""") # input datasets try: main_input_names = get_input_names_for_role('main') inputTables = [] for inputname in main_input_names: inconnectioninfo = dataiku.Dataset( inputname).get_location_info()['info'] inTable = inputtableinfo(inconnectioninfo, inputname, dss_function) inputTables.append(inTable) except Exception as error: raise RuntimeError( """Error obtaining connection settings from one of the input tables.""" """ Make sure connection setting is set to 'Read a database table'.""" """ This plugin only supports Aster tables.""") # actual query query = getFunctionsQuery(dss_function, inputTables, outputTable) print('\n'.join(query)) executor = SQLExecutor2(dataset=input_dataset) if dss_function.get('dropIfExists', False): dropAllQuery = getDropOutputTableArgumentsStatements( dss_function.get('arguments', [])) executor.query_to_df('END TRANSACTION;', dropAllQuery) executor.query_to_df("END TRANSACTION;", pre_queries=query) # write table schema nQuery = '''SELECT * FROM {} LIMIT (1);'''.format(outputTable.tablename) selectResult = executor.query_to_df(nQuery) output_schema = [] for column in selectResult.columns: output_schema.append({"name": column, "type": "string"}) output_dataset.write_schema(output_schema)
def testCanopy(self): inputConnectionConfig = { 'table': 'computers_train1', 'schema': 'public' } outputConnectionConfig = {'table': 'canopy_out', 'schema': 'dss'} functionInputTable = inputtableinfo(inputConnectionConfig, 'computers_train1_public', canopyConfig) functionOutputTable = outputtableinfo(outputConnectionConfig, 'canopy_out', canopyConfig) actualquery = getFunctionsQuery(canopyConfig, [functionInputTable], functionOutputTable) expectedquery = '''CREATE DIMENSION TABLE "dss.canopy_out" AS SELECT * FROM CANOPY ( ON (SELECT 1) PARTITION BY 1 INPUTTABLE('computers_train1') LOOSEDISTANCE('1000') TIGHTDISTANCE('500') );''' self.assertEqual(actualquery[2], expectedquery, "test TABLE_NAME argument datatype")
def testEvaluateNamedEntityFinderPartition(self): testInputConnectionConfig = { 'table': 'textclassifier_input', 'schema': 'dss' } testOuputConnectionConfig = {'table': 'dss_test'} testfunction = dssFunction() testConfig = testfunction.name( 'EVALUATENAMEDENTITYFINDERPARTITION').unaliased_inputs_count( 1).add_unaliased_input('textclassifier_input', 'PartitionByOne').build() functionInputTable = inputtableinfo(testInputConnectionConfig, 'textclassifier_input', testConfig) functionOutputTable = outputtableinfo(testOuputConnectionConfig, 'dss_test', testConfig) actualquery = getFunctionsQuery(testConfig, [functionInputTable], functionOutputTable) expectedquery = """BEGIN TRANSACTION; DROP TABLE IF EXISTS "public.dss_test"; CREATE DIMENSION TABLE "public.dss_test" AS SELECT * FROM EVALUATENAMEDENTITYFINDERPARTITION ( ON dss.textclassifier_input PARTITION BY 1 ); COMMIT; END TRANSACTION;""" self.assertEqual('\n'.join(actualquery + ['END TRANSACTION;']), expectedquery, 'EvaluateNamedEntityFinderPartition')
def testNertrainer(self): testInputConnectionConfig = { 'table': 'ner_sports_train', 'schema': 'dss' } testOutputConnectionConfig = { 'table': 'dss_nertrainer', 'schema': 'dss' } functionInputTable = inputtableinfo(testInputConnectionConfig, 'ner_sports_train', nertrainerConfig) functionOutputTable = outputtableinfo(testOutputConnectionConfig, 'dss_nertrainer', nertrainerConfig) actualquery = getFunctionsQuery(nertrainerConfig, [functionInputTable], functionOutputTable) expectedquery = """BEGIN TRANSACTION; DROP TABLE IF EXISTS "dss.dss_nertrainer"; CREATE DIMENSION TABLE "dss.dss_nertrainer" AS SELECT * FROM NERTRAINER ( ON dss.ner_sports_train PARTITION BY id TEXTCOLUMN('content') MODELFILE('ner_model.bin') MAXITERATION('1000') FEATURETEMPLATE('template_1.txt') ); COMMIT; END TRANSACTION;""" self.assertEqual('\n'.join(actualquery) + '\nEND TRANSACTION;', expectedquery, 'Nertrainer test INTEGER argument')
def testTextParser(self): testInputConnectionConfig = { 'table': 'textparser_input', 'schema': 'dss' } testOutputConnectionConfig = { 'table': 'textparser_output', 'schema': 'dss' } functionInputTable = inputtableinfo(testInputConnectionConfig, 'textparser_input', textParserConfig) functionOutputTable = outputtableinfo(testOutputConnectionConfig, 'textparser_output', textParserConfig) actualquery = getFunctionsQuery(textParserConfig, [functionInputTable], functionOutputTable) expectedquery = """CREATE DIMENSION TABLE "dss.textparser_output" AS SELECT * FROM TEXT_PARSER ( ON dss.textparser_input PARTITION BY ANY TEXT_COLUMN('text_data') CASE_INSENSITIVE('True') OUTPUT_BY_WORD('True') REMOVE_STOP_WORDS('True') STOP_WORDS('stopwords.txt') PUNCTUATION('\[.,?\!\]') LIST_POSITIONS('True') ACCUMULATE('doc_id', 'category') );""" self.assertEqual(actualquery[2], expectedquery, 'test boolean arguments')
def testTextTokenizer(self): textTokenizerOutputConnectionConfig = {'table': 'text_tokenized'} textTokenizerInputConnectionConfig = { 'table': 'text_chunked', 'schema': 'bs186029' } functionInputTable = inputtableinfo(textTokenizerInputConnectionConfig, 'text_chunked', textTokenizerConfig) functionOutputTable = outputtableinfo( textTokenizerOutputConnectionConfig, 'text_tokenized', textTokenizerConfig) actualquery = getFunctionsQuery(textTokenizerConfig, [functionInputTable], functionOutputTable) expectedquery = """BEGIN TRANSACTION; DROP TABLE IF EXISTS "public.text_tokenized"; CREATE DIMENSION TABLE "public.text_tokenized" AS SELECT * FROM TEXTTOKENIZER ( ON bs186029.text_chunked PARTITION BY ANY TEXTCOLUMN('chunk') ); COMMIT; END TRANSACTION;""" self.assertEqual('\n'.join(actualquery) + '\nEND TRANSACTION;', expectedquery, "TextTokenizer")
def testNer(self): testInputConnectionConfig = { 'table': 'ner_sports_test', 'schema': 'dss' } testRuleConnectionConfig = {'table': 'rule_table1', 'schema': 'dss'} testOutputConnectionConfig = {'table': 'dss_ner', 'schema': 'dss'} functionInputTable = inputtableinfo(testInputConnectionConfig, 'ner_sports_test', nerConfig) ruleTable = inputtableinfo(testRuleConnectionConfig, 'rule_table', nerConfig) functionOutputTable = outputtableinfo(testOutputConnectionConfig, 'dss_ner', nerConfig) actualquery = getFunctionsQuery(nerConfig, [functionInputTable, ruleTable], functionOutputTable) expectedquery = """BEGIN TRANSACTION; DROP TABLE IF EXISTS "dss.dss_ner"; CREATE DIMENSION TABLE "dss.dss_ner" AS SELECT * FROM NER ( ON dss.ner_sports_test PARTITION BY ANY ON dss.rule_table1 AS "rules" DIMENSION TEXTCOLUMN('content') MODELS('ner_model.bin') ACCUMULATE('id') SHOWCONTEXT('2') ); COMMIT; END TRANSACTION;""" self.assertEqual('\n'.join(actualquery) + '\nEND TRANSACTION;', expectedquery, 'Nerevaluator')
def testNerEvaluator(self): testInputConnectionConfig = { 'table': 'ner_sports_test', 'schema': 'dss' } testOutputConnectionConfig = { 'table': 'dss_nerevaluator', 'schema': 'dss' } functionInputTable = inputtableinfo(testInputConnectionConfig, 'ner_sports_test', nerEvaluatorConfig) functionOutputTable = outputtableinfo(testOutputConnectionConfig, 'dss_ner', nerEvaluatorConfig) actualquery = getFunctionsQuery(nerEvaluatorConfig, [functionInputTable], functionOutputTable) expectedquery = """BEGIN TRANSACTION; DROP TABLE IF EXISTS "dss.dss_nerevaluator"; CREATE DIMENSION TABLE "dss.dss_nerevaluator" AS SELECT * FROM NEREVALUATOR ( ON dss.ner_sports_test PARTITION BY id TEXTCOLUMN('content') MODEL('ner_model.bin') ); COMMIT; END TRANSACTION;""" self.assertEqual('\n'.join(actualquery) + '\nEND TRANSACTION;', expectedquery, 'Nertrainer')
def testNaiveBayes(self): inputConnectionConfig = { 'table': 'dss_nb_iris_dataset_train', 'schema': 'public' } outputConnectionConfig = { 'table': 'naivebayestrain_out', 'schema': 'dss' } functionInputTable = inputtableinfo(inputConnectionConfig, 'dss_nb_iris_dataset_train', naiveBayesTrainConfig) functionOutputTable = outputtableinfo(outputConnectionConfig, 'naivebayestrain_out', naiveBayesTrainConfig) actualquery = getFunctionsQuery(naiveBayesTrainConfig, [functionInputTable], functionOutputTable) expectedquery = '''BEGIN TRANSACTION; DROP TABLE IF EXISTS "dss.naivebayestrain_out"; CREATE DIMENSION TABLE "dss.naivebayestrain_out" AS (SELECT * FROM NAIVEBAYESREDUCE (ON (SELECT * FROM NAIVEBAYESMAP (ON public.dss_nb_iris_dataset_train NUMERICINPUTS('sepal_length', 'sepal_width', 'petal_length', 'petal_width') RESPONSE('species') )) PARTITION BY class )); COMMIT; END TRANSACTION;''' self.assertEqual( '\n'.join(actualquery + ['END TRANSACTION;']), expectedquery, "test CASCADED FUNCTIONS NAIVEBAYESMAP AND NAIVEBAYESREDUCE")
def testNpath(self): inputConnectionConfig = {'table': 'tv_shows'} outputConnectionConfig = { 'table': 'dss_tv_show_npath', 'schema': 'dss' } functionInputTable = inputtableinfo(inputConnectionConfig, 'tv_shows', npathConfig) functionOutputTable = outputtableinfo(outputConnectionConfig, 'dss_tv_show_npath', npathConfig) actualquery = getFunctionsQuery(npathConfig, [functionInputTable], functionOutputTable) expectedquery = '''CREATE DIMENSION TABLE "dss.dss_tv_show_npath" AS SELECT * FROM NPATH ( ON public.tv_shows PARTITION BY id ORDER BY ts MODE('nonoverlapping') PATTERN('S1{1,3}.S2') SYMBOLS(tvshow='BreakingBad' as S2, tvshow <> 'BreakingBad' as S1) RESULT(accumulate(tvshow of any(S1,S2)) as path) );''' self.assertEqual(actualquery[2], expectedquery, "Npath")
def asterDo(): """ Takes the parameters set by the user from the UI, creates the query, and then executes it. """ # Formatting options. SEP_LENGTH = 80 SEP = "=" * SEP_LENGTH # Recipe inputs main_input_name = get_input_names_for_role('main')[0] input_dataset = dataiku.Dataset(main_input_name) # Recipe outputs main_output_name = get_output_names_for_role('main')[0] output_dataset = dataiku.Dataset(main_output_name) # Daitaiku DSS params client = dataiku.api_client() projectkey = main_input_name.split('.')[0] project = client.get_project(projectkey) # Connection properties. # TODO: Handle Input and Output Table connection properties. properties = input_dataset.get_location_info( sensitive_info=True)['info'].get('connectionParams').get('properties') autocommit = input_dataset.get_location_info( sensitive_info=True)['info'].get('connectionParams').get( 'autocommitMode') # SQL Executor. executor = SQLExecutor2(dataset=input_dataset) # Handle pre- and post-query additions. # Assume autocommit TERA mode by default. pre_query = None post_query = None print(SEP) if not autocommit: print("NOT AUTOCOMMIT MODE.") print("Assuming TERA mode.") pre_query = ["BEGIN TRANSACTION;"] post_query = ["END TRANSACTION;"] for prop in properties: if prop['name'] == 'TMODE': if prop['value'] == 'ANSI': print("ANSI mode.") pre_query = [";"] post_query = ["COMMIT WORK;"] else: print("AUTOCOMMIT MODE.") print("No pre- and post-query.") print(SEP) # Recipe function param dss_function = get_recipe_config().get('function', None) pp = pprint.PrettyPrinter(indent=4) print(SEP) print('DSS Function:') pp.pprint(dss_function) print(SEP) # output dataset try: outputTable = outputtableinfo( output_dataset.get_location_info()['info'], main_output_name, get_recipe_config() or {}) except Exception as error: raise RuntimeError( """Error obtaining connection settings for output table.""" """ This plugin only supports Teradata tables.""") # input datasets try: main_input_names = get_input_names_for_role('main') inputTables = [] for inputname in main_input_names: inconnectioninfo = dataiku.Dataset( inputname).get_location_info()['info'] inTable = inputtableinfo(inconnectioninfo, inputname, dss_function) inputTables.append(inTable) except Exception as error: raise RuntimeError( """Error obtaining connection settings from one of the input tables.""" """ This plugin only supports Teradata tables.""") # Handle dropping of output tables. if dss_function.get('dropIfExists', False): print("Preparing to drop tables.") drop_query = dropTableStatement(outputTable) print(SEP) print("DROP query:") print(drop_query) print(SEP) try: # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) executor.query_to_df(drop_query, post_queries=post_query) except Exception as e: print(e) # Drop other output tables if they exist. drop_all_query = getDropOutputTableArgumentsStatements( dss_function.get('output_tables', [])) print(SEP) print('Drop ALL Query:') print(drop_all_query) print(SEP) for drop_q in drop_all_query: # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) executor.query_to_df(drop_q, post_queries=post_query) # CREATE query. create_query = getFunctionsQuery(dss_function, inputTables, outputTable, get_recipe_config() or {}) print(SEP) print("CREATE query:") print(create_query) print(SEP) # Detect error try: # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) executor.query_to_df(create_query, post_queries=post_query) except Exception as error: err_str = str(error) err_str_list = err_str.split(" ") # trying to shorten the error for the modal in front-end if len(err_str_list) > 15: print(SEP) print(error) print(SEP) new_err_str = err_str_list[:15] new_err_str.append("\n\n") new_err_str.append("...") new_err_str = " ".join(new_err_str) raise RuntimeError(new_err_str) else: raise RuntimeError(err_str) print('Moving results to output...') select_sample_query = 'SELECT * from ' + outputTable.tablename + ' SAMPLE 0' # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) sel_res = executor.query_to_df(select_sample_query, post_queries=post_query) pythonrecipe_out = output_dataset pythonrecipe_out.write_schema_from_dataframe(sel_res) # Additional Tables outtables = dss_function.get('output_tables', []) if (outtables != []): tableCounter = 1 print('Working on output tables') print(get_output_names_for_role('main')) print(outtables) for table in outtables: if table.get('value') != '' and table.get('value') != None: # try: print('Table') print(table) #Need to change this to max of split in order for multiple database or no-database table inputs #Change below might fix issue 4 of Jan 4 2018 for Nico. For non-drop tables try: main_output_name2 = list( filter( lambda out_dataset: out_dataset.split('.')[len( out_dataset.split('.')) - 1] == table. get('value').split('.')[len( table.get('value').split('.')) - 1].strip('\"' ), get_output_names_for_role('main')))[0] except Exception as error: # print(error.message) raise RuntimeError( 'Unable to find an output dataset for ' + table.get('value') + 'It may not exist as an output Dataset: ' + table.get('value') + "\n\Runtime Error:" + error.message) print('Output name 2') print(main_output_name2) output_dataset2 = dataiku.Dataset(main_output_name2) # print("od2 printer") tableNamePrefix = output_dataset2.get_location_info( sensitive_info=True)['info'].get('connectionParams').get( 'namingRule').get('tableNameDatasetNamePrefix') if tableNamePrefix != None or tableNamePrefix != '': print('Table prefix is not empty:' + tableNamePrefix) # except: # #Need to change this error # print('Error: Dataset for' + table.get('name') + ' not found') # raise Value customOutputTableSQL = 'SELECT * from ' + table.get( 'value') + ' SAMPLE 0' print('Working on table number:') print(tableCounter) print(customOutputTableSQL) # dataiku's query_to_df's pre_query parameter seems to not work. This is a work-around to ensure that the # "START TRANSACTION;" block applies for non-autocommit TERA mode connections. if not autocommit: executor.query_to_df(pre_query) sel_res = executor.query_to_df(customOutputTableSQL, post_queries=post_query) # selRes = executor.query_to_df(customOutputTableSQL, pre_queries=pre_query, post_queries=post_query) tableCounter += 1 pythonrecipe_out2 = output_dataset2 pythonrecipe_out2.write_schema_from_dataframe(selRes) print('Complete!') # Uncomment end
def asterDo(): # Recipe inputs main_input_name = get_input_names_for_role('main')[0] input_dataset = dataiku.Dataset(main_input_name) # print('Connection info:') # print(input_dataset.get_location_info(sensitive_info=True)['info']) properties = input_dataset.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('properties') autocommit = input_dataset.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('autocommitMode') requiresTransactions = True print('I am assuming in TERA MODE by default') if autocommit: print('I am assuming autocommmit TERA mode') requiresTransactions = False tmode = 'TERA' stTxn = ';' edTxn = ';' else: #Detected TERA print('I am assuming non-autocommit TERA MODE') tmode = 'TERA' stTxn = 'BEGIN TRANSACTION;' edTxn = 'END TRANSACTION;' for prop in properties: if prop['name'] == 'TMODE' and prop['value'] == 'TERA': if autocommit: print('I am in autocommmit TERA mode') requiresTransactions = False tmode = 'TERA' stTxn = ';' edTxn = ';' else: #Detected TERA print('I am in TERA MODE') tmode = 'TERA' stTxn = 'BEGIN TRANSACTION;' edTxn = 'END TRANSACTION;' elif prop['name'] == 'TMODE' and prop['value'] == 'ANSI': #Detected ANSI print('I am in ANSI MODE') tmode = 'ANSI' stTxn = ';' edTxn = 'COMMIT WORK;' print(properties) # Recipe outputs main_output_name = get_output_names_for_role('main')[0] output_dataset = dataiku.Dataset(main_output_name) # Recipe function param dss_function = get_recipe_config().get('function', None) print ("\n=================================\n") print ('Showing DSS Function') print (dss_function) print ("\n=================================\n") # Daitaiku DSS params client = dataiku.api_client() projectkey = main_input_name.split('.')[0] project = client.get_project(projectkey) try: # output dataset outputTable = outputtableinfo(output_dataset.get_location_info()['info'], main_output_name, get_recipe_config() or {}) except Exception as error: raise RuntimeError("""Error obtaining connection settings for output table.""" """ This plugin only supports Teradata tables.""") # raise RuntimeError("""Error obtaining connection settings for output table.""" # """ Make sure connection setting is set to 'Read a database table'.""" # """ This plugin only supports Aster tables.""") # input datasets try: main_input_names = get_input_names_for_role('main') inputTables = [] for inputname in main_input_names: inconnectioninfo = dataiku.Dataset(inputname).get_location_info()['info'] inTable = inputtableinfo(inconnectioninfo, inputname, dss_function) inputTables.append(inTable) except Exception as error: raise RuntimeError("""Error obtaining connection settings from one of the input tables.""" """ This plugin only supports Teradata tables.""") # actual query query = getFunctionsQuery(dss_function, inputTables, outputTable, get_recipe_config() or {}) # print("=============================") # print (query) # print("=============================") # raise RuntimeError("""I Just wanted to make this execution stop: """) # Uncomment below executor = SQLExecutor2(dataset=input_dataset) if dss_function.get('dropIfExists', False): # dropTableStr = dropTableStatement(outputTable) # executor.query_to_df(dropTableStr) if requiresTransactions: print('Start transaction for drop') print(stTxn) executor.query_to_df(stTxn) print('End transaction for drop') print(edTxn) executor.query_to_df(edTxn,[dropTableStr]) else: executor.query_to_df([dropTableStr]) #Start transaction #Move to drop query and make each DROP run separately dropAllQuery = getDropOutputTableArgumentsStatements(dss_function.get('output_tables', [])) print('Drop ALL Query:') print(dropAllQuery) # if requiresTransactions: # print('Start transaction for drop') # print(stTxn) # executor.query_to_df(stTxn) # dropAllQuery = getDropOutputTableArgumentsStatements(dss_function.get('arguments', [])) #Change dropAllQuery to string/s? or execute one drop per item in list. if requiresTransactions: for dropTblStmt in dropAllQuery: print('Start transaction for drop') print(stTxn) executor.query_to_df(stTxn) print('End transaction for drop') print(edTxn) executor.query_to_df(edTxn,[dropTblStmt]) # executor.query_to_df(edTxn) else: for dropTblStmt in dropAllQuery: executor.query_to_df([dropTblStmt]) # executor.query_to_df("END TRANSACTION;", pre_queries=query) # write table schema ACTUAL COMMENT # nQuery = '''SELECT * FROM {} LIMIT (1);'''.format(outputTable.tablename) # selectResult = executor.query_to_df(nQuery); # output_schema = [] # for column in selectResult.columns: # output_schema.append({"name":column, "type":"string"}) # output_dataset.write_schema(output_schema) print('\n'.join(query)) # print(dss_function) # recipe_output_table = dss_function.get('recipeOutputTable', "") # print('recipe_output_table before IF') # print(recipe_output_table) if requiresTransactions: print('Start transaction for create table') print(stTxn) executor.query_to_df(stTxn) # Detect error try: selectResult = executor.query_to_df(query) except Exception as error: err_str = str(error) err_str_list = err_str.split(" ") # trying to shorten the error for the modal in front-end if len(err_str_list) > 15: print ("\n=================================\n") print (error) print ("\n=================================\n") new_err_str = err_str_list[:15] new_err_str.append("\n\n") new_err_str.append("...") new_err_str = " ".join(new_err_str) raise RuntimeError(new_err_str) else: raise RuntimeError(err_str) if requiresTransactions: print('End transaction for create table') print(edTxn) executor.query_to_df(edTxn) print('Moving results to output...') if requiresTransactions: print('Start transaction for schema building') print(stTxn) executor.query_to_df(stTxn) # pythonrecipe_out = output_dataset # pythonrecipe_out.write_with_schema(selectResult) customOutputTableSQL = 'SELECT * from '+ outputTable.tablename + ' SAMPLE 0' selRes = executor.query_to_df(customOutputTableSQL) pythonrecipe_out = output_dataset pythonrecipe_out.write_schema_from_dataframe(selRes) # Additional Tables outtables = dss_function.get('output_tables', []) if(outtables != []): tableCounter = 1 print('Working on output tables') print(get_output_names_for_role('main')) print(outtables) for table in outtables: if table.get('value') != '' and table.get('value') != None: # try: print('Table') print(table) #Need to change this to max of split in order for multiple database or no-database table inputs #Change below might fix issue 4 of Jan 4 2018 for Nico. For non-drop tables try: main_output_name2 = list(filter(lambda out_dataset: out_dataset.split('.')[len(out_dataset.split('.'))-1] == table.get('value').split('.')[len(table.get('value').split('.'))-1].strip('\"'),get_output_names_for_role('main')))[0] except Exception as error: # print(error.message) raise RuntimeError('Unable to find an output dataset for '+table.get('value')+ 'It may not exist as an output Dataset: '+table.get('value')+"\n\Runtime Error:"+ error.message) print('Output name 2') print(main_output_name2) output_dataset2 = dataiku.Dataset(main_output_name2) # print("od2 printer") tableNamePrefix = output_dataset2.get_location_info(sensitive_info=True)['info'].get('connectionParams').get('namingRule').get('tableNameDatasetNamePrefix') if tableNamePrefix != None or tableNamePrefix != '': print('Table prefix is not empty:' + tableNamePrefix) # except: # #Need to change this error # print('Error: Dataset for' + table.get('name') + ' not found') # raise Value customOutputTableSQL = 'SELECT * from '+ table.get('value') + ' SAMPLE 0' print('Working on table number:') print(tableCounter) print(customOutputTableSQL) selRes = executor.query_to_df(customOutputTableSQL) tableCounter += 1 pythonrecipe_out2 = output_dataset2 pythonrecipe_out2.write_schema_from_dataframe(selRes) if requiresTransactions: print('End transaction for schema building') print(edTxn) executor.query_to_df(edTxn) print('Complete!') # Uncomment end
def createTable(self, config, metadata, mode='input'): if 'output' == mode: return outputtableinfo(config, config.get('dataset', ""), metadata) return inputtableinfo(config, config.get('table', ""), metadata)