def test_get_input_table_by_name_existing_passes(self): ci = CommonInterface() in_table = ci.get_input_table_definition_by_name('fooBar') self.assertEqual(in_table.id, 'in.c-main.test2') self.assertEqual(in_table.full_path, os.path.join(ci.tables_in_path, 'fooBar')) self.assertEqual(in_table.name, 'fooBar')
def test_get_input_files_definition_latest(self): ci = CommonInterface() files = ci.get_input_files_definitions() self.assertEqual(len(files), 5) for file in files: if file.name == 'duty_calls.png': self.assertEqual(file.id, '151971455')
def test_get_input_files_definition_nofilter(self): ci = CommonInterface() files = ci.get_input_files_definitions(only_latest_files=False) self.assertEqual(len(files), 6) for file in files: if file.name == 'duty_calls': self.assertEqual(file.tags, ["xkcd"]) self.assertEqual(file.max_age_days, 180) self.assertEqual(file.size_bytes, 30027)
def test_get_input_files_definition_by_tag(self): ci = CommonInterface() files = ci.get_input_files_definitions(tags=['dilbert']) self.assertEqual(len(files), 3) for file in files: if file.name == '21702.strip.print.gif': self.assertEqual(file.tags, ["dilbert"]) self.assertEqual(file.max_age_days, 180) self.assertEqual(file.size_bytes, 4931)
def test_get_input_files_definition_no_manifest_passes(self): ci = CommonInterface( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_examples', 'data2')) files = ci.get_input_files_definitions(only_latest_files=True) self.assertEqual(len(files), 1) for file in files: self.assertEqual(file.max_age_days, 0) self.assertEqual(file.size_bytes, 0) self.assertEqual(file.created, None)
def test_state_file_created(self): ci = CommonInterface() # write ci.write_state_file({"some_state": 1234}) # load state_filename = os.path.join(ci.data_folder_path, 'out', 'state.json') with open(state_filename) as state_file: state = json.load(state_file) self.assertEqual({"some_state": 1234}, state) # cleanup os.remove(state_filename)
def test_all_env_variables_initialized(self): # set all variables os.environ['KBC_RUNID'] = 'KBC_RUNID' os.environ['KBC_PROJECTID'] = 'KBC_PROJECTID' os.environ['KBC_STACKID'] = 'KBC_STACKID' os.environ['KBC_CONFIGID'] = 'KBC_CONFIGID' os.environ['KBC_COMPONENTID'] = 'KBC_COMPONENTID' os.environ['KBC_PROJECTNAME'] = 'KBC_PROJECTNAME' os.environ['KBC_TOKENID'] = 'KBC_TOKENID' os.environ['KBC_TOKENDESC'] = 'KBC_TOKENDESC' os.environ['KBC_TOKEN'] = 'KBC_TOKEN' os.environ['KBC_URL'] = 'KBC_URL' os.environ['KBC_LOGGER_ADDR'] = 'KBC_LOGGER_ADDR' os.environ['KBC_LOGGER_PORT'] = 'KBC_LOGGER_PORT' ci = CommonInterface() self.assertEqual(ci.environment_variables.data_dir, os.environ["KBC_DATADIR"]) self.assertEqual(ci.environment_variables.run_id, 'KBC_RUNID') self.assertEqual(ci.environment_variables.project_id, 'KBC_PROJECTID') self.assertEqual(ci.environment_variables.stack_id, 'KBC_STACKID') self.assertEqual(ci.environment_variables.config_id, 'KBC_CONFIGID') self.assertEqual(ci.environment_variables.component_id, 'KBC_COMPONENTID') self.assertEqual(ci.environment_variables.project_name, 'KBC_PROJECTNAME') self.assertEqual(ci.environment_variables.token_id, 'KBC_TOKENID') self.assertEqual(ci.environment_variables.token_desc, 'KBC_TOKENDESC') self.assertEqual(ci.environment_variables.token, 'KBC_TOKEN') self.assertEqual(ci.environment_variables.url, 'KBC_URL') self.assertEqual(ci.environment_variables.logger_addr, 'KBC_LOGGER_ADDR') self.assertEqual(ci.environment_variables.logger_port, 'KBC_LOGGER_PORT')
def test_missing_config(self): os.environ["KBC_DATADIR"] = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'data_examples') with self.assertRaisesRegex( ValueError, "Configuration file config.json not found"): ci = CommonInterface() c = ci.configuration
def test_get_input_files_definition_tag_group_w_system(self): ci = CommonInterface( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_examples', 'data_system_tags')) files = ci.get_input_file_definitions_grouped_by_tag_group( only_latest_files=False) self.assertEqual(len(files), 2) self.assertEqual(len(files["bar;foo"]), 3) for file in files["bar;foo"]: if file.name == 'compiler_complaint.png': self.assertEqual(file.tags, [ "foo", "bar", "componentId: 1234", "configurationId: 12345", "configurationRowId: 12345", "runId: 22123", "branchId: 312321" ])
def test_get_input_files_definition_by_tag_w_system(self): ci = CommonInterface( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_examples', 'data_system_tags')) files = ci.get_input_files_definitions(tags=['dilbert']) self.assertEqual(len(files), 3) for file in files: if file.name == '21702.strip.print.gif': self.assertEqual(file.tags, [ "dilbert", "componentId: 1234", "configurationId: 12345", "configurationRowId: 12345", "runId: 22123", "branchId: 312321" ]) self.assertEqual(file.max_age_days, 180) self.assertEqual(file.size_bytes, 4931)
def test_create_and_write_table_manifest_multi(self): ci = CommonInterface() # create table def out_table = ci.create_out_table_definition( 'some-table.csv', columns=['foo', 'bar'], destination='some-destination', primary_key=['foo'], incremental=True, delete_where={ 'column': 'lilly', 'values': ['a', 'b'], 'operator': 'eq' }) out_table.table_metadata.add_table_metadata('bar', 'kochba') out_table.table_metadata.add_column_metadata('bar', 'foo', 'gogo') # write ci.write_manifests([out_table]) manifest_filename = out_table.full_path + '.manifest' with open(manifest_filename) as manifest_file: config = json.load(manifest_file) self.assertEqual( { 'destination': 'some-destination', 'columns': ['foo', 'bar'], 'primary_key': ['foo'], 'incremental': True, 'metadata': [{ 'key': 'bar', 'value': 'kochba' }], 'delimiter': ',', 'enclosure': '"', 'column_metadata': { 'bar': [{ 'key': 'foo', 'value': 'gogo' }] }, 'delete_where_column': 'lilly', 'delete_where_values': ['a', 'b'], 'delete_where_operator': 'eq' }, config) os.remove(manifest_filename)
def test_get_input_tables_definition_orphaned_manifest(self): ci = CommonInterface() tables = ci.get_input_tables_definitions(orphaned_manifests=True) self.assertEqual(len(tables), 5) for table in tables: if table.name == 'sample.csv': self.assertEqual(table.columns, [ "x", "Sales", "CompPrice", "Income", "Advertising", "Population", "Price", "ShelveLoc", "Age", "Education", "Urban", "US", "High" ]) self.assertEqual(table.rows_count, 400) self.assertEqual(table.data_size_bytes, 81920) elif table.name == 'fooBar': self.assertEqual(table.id, 'in.c-main.test2') self.assertEqual(table.full_path, os.path.join(ci.tables_in_path, 'fooBar')) self.assertEqual(table.name, 'fooBar')
def test_create_and_write_file_manifest(self): ci = CommonInterface() # create table def out_file = ci.create_out_file_definition('some-file.jpg', is_permanent=True, is_encrypted=True, is_public=True, tags=['foo', 'bar'], notify=True) # write ci.write_manifest(out_file) manifest_filename = out_file.full_path + '.manifest' with open(manifest_filename) as manifest_file: config = json.load(manifest_file) self.assertEqual( { 'tags': ['foo', 'bar'], 'is_public': True, 'is_permanent': True, 'is_encrypted': True, 'notify': True }, config) os.remove(manifest_filename)
import csv import os # Load the Component library to process the config file from keboola.component import CommonInterface # Rely on the KBC_DATADIR environment variable by default, # alternatively provide a data folder path in the constructor (CommonInterface('data')) ci = CommonInterface() params = ci.configuration.parameters print("Hello world from python") csvlt = '\n' csvdel = ',' csvquo = '"' # get input table definition by name in_table = ci.get_input_table_definition_by_name('bitcoin-price.csv') with open(in_table.full_path, mode='rt', encoding='utf-8') as in_file, \ open(os.path.join(ci.tables_out_path, 'odd.csv'), mode='wt', encoding='utf-8') as odd_file, \ open(os.path.join(ci.tables_out_path, 'even.csv'), mode='wt', encoding='utf-8') as even_file: lazy_lines = (line.replace('\0', '') for line in in_file) reader = csv.DictReader(lazy_lines, lineterminator=csvlt, delimiter=csvdel, quotechar=csvquo) odd_writer = csv.DictWriter(odd_file, fieldnames=reader.fieldnames, lineterminator=csvlt, delimiter=csvdel, quotechar=csvquo) odd_writer.writeheader()
def test_get_tables_in_dir(self): ci = CommonInterface() tables_out = os.path.join(os.getenv('KBC_DATADIR', ''), 'in', 'files') self.assertEqual(tables_out, ci.files_in_path)
def test_get_data_dir(self): ci = CommonInterface() self.assertEqual(os.getenv('KBC_DATADIR', ''), ci.data_folder_path)
def test_missing_dir(self): os.environ["KBC_DATADIR"] = "asdf" with self.assertRaisesRegex(ValueError, "The data directory does not exist"): CommonInterface()
def test_state_file_initialized(self): ci = CommonInterface() state = ci.get_state_file() self.assertEqual(state['test_state'], 1234)
def test_get_input_table_by_name_fails_on_nonexistent(self): ci = CommonInterface() with self.assertRaises(ValueError): ci.get_input_table_definition_by_name('nonexistent.csv')