def test_get_input_table_by_name_existing_passes(self):
     ci = CommonInterface()
     in_table = ci.get_input_table_definition_by_name('fooBar')
     self.assertEqual(in_table.id, 'in.c-main.test2')
     self.assertEqual(in_table.full_path,
                      os.path.join(ci.tables_in_path, 'fooBar'))
     self.assertEqual(in_table.name, 'fooBar')
    def test_get_input_files_definition_latest(self):
        ci = CommonInterface()

        files = ci.get_input_files_definitions()

        self.assertEqual(len(files), 5)
        for file in files:
            if file.name == 'duty_calls.png':
                self.assertEqual(file.id, '151971455')
    def test_get_input_files_definition_nofilter(self):
        ci = CommonInterface()

        files = ci.get_input_files_definitions(only_latest_files=False)

        self.assertEqual(len(files), 6)
        for file in files:
            if file.name == 'duty_calls':
                self.assertEqual(file.tags, ["xkcd"])
                self.assertEqual(file.max_age_days, 180)
                self.assertEqual(file.size_bytes, 30027)
    def test_get_input_files_definition_by_tag(self):
        ci = CommonInterface()

        files = ci.get_input_files_definitions(tags=['dilbert'])

        self.assertEqual(len(files), 3)
        for file in files:
            if file.name == '21702.strip.print.gif':
                self.assertEqual(file.tags, ["dilbert"])
                self.assertEqual(file.max_age_days, 180)
                self.assertEqual(file.size_bytes, 4931)
    def test_get_input_files_definition_no_manifest_passes(self):
        ci = CommonInterface(
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         'data_examples', 'data2'))

        files = ci.get_input_files_definitions(only_latest_files=True)

        self.assertEqual(len(files), 1)
        for file in files:
            self.assertEqual(file.max_age_days, 0)
            self.assertEqual(file.size_bytes, 0)
            self.assertEqual(file.created, None)
    def test_state_file_created(self):
        ci = CommonInterface()
        # write
        ci.write_state_file({"some_state": 1234})

        # load
        state_filename = os.path.join(ci.data_folder_path, 'out', 'state.json')
        with open(state_filename) as state_file:
            state = json.load(state_file)

        self.assertEqual({"some_state": 1234}, state)

        # cleanup
        os.remove(state_filename)
    def test_all_env_variables_initialized(self):
        # set all variables
        os.environ['KBC_RUNID'] = 'KBC_RUNID'
        os.environ['KBC_PROJECTID'] = 'KBC_PROJECTID'
        os.environ['KBC_STACKID'] = 'KBC_STACKID'
        os.environ['KBC_CONFIGID'] = 'KBC_CONFIGID'
        os.environ['KBC_COMPONENTID'] = 'KBC_COMPONENTID'
        os.environ['KBC_PROJECTNAME'] = 'KBC_PROJECTNAME'
        os.environ['KBC_TOKENID'] = 'KBC_TOKENID'
        os.environ['KBC_TOKENDESC'] = 'KBC_TOKENDESC'
        os.environ['KBC_TOKEN'] = 'KBC_TOKEN'
        os.environ['KBC_URL'] = 'KBC_URL'
        os.environ['KBC_LOGGER_ADDR'] = 'KBC_LOGGER_ADDR'
        os.environ['KBC_LOGGER_PORT'] = 'KBC_LOGGER_PORT'

        ci = CommonInterface()
        self.assertEqual(ci.environment_variables.data_dir,
                         os.environ["KBC_DATADIR"])
        self.assertEqual(ci.environment_variables.run_id, 'KBC_RUNID')
        self.assertEqual(ci.environment_variables.project_id, 'KBC_PROJECTID')
        self.assertEqual(ci.environment_variables.stack_id, 'KBC_STACKID')
        self.assertEqual(ci.environment_variables.config_id, 'KBC_CONFIGID')
        self.assertEqual(ci.environment_variables.component_id,
                         'KBC_COMPONENTID')
        self.assertEqual(ci.environment_variables.project_name,
                         'KBC_PROJECTNAME')
        self.assertEqual(ci.environment_variables.token_id, 'KBC_TOKENID')
        self.assertEqual(ci.environment_variables.token_desc, 'KBC_TOKENDESC')
        self.assertEqual(ci.environment_variables.token, 'KBC_TOKEN')
        self.assertEqual(ci.environment_variables.url, 'KBC_URL')
        self.assertEqual(ci.environment_variables.logger_addr,
                         'KBC_LOGGER_ADDR')
        self.assertEqual(ci.environment_variables.logger_port,
                         'KBC_LOGGER_PORT')
 def test_missing_config(self):
     os.environ["KBC_DATADIR"] = os.path.join(
         os.path.dirname(os.path.realpath(__file__)), 'data_examples')
     with self.assertRaisesRegex(
             ValueError, "Configuration file config.json not found"):
         ci = CommonInterface()
         c = ci.configuration
    def test_get_input_files_definition_tag_group_w_system(self):
        ci = CommonInterface(
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         'data_examples', 'data_system_tags'))

        files = ci.get_input_file_definitions_grouped_by_tag_group(
            only_latest_files=False)

        self.assertEqual(len(files), 2)
        self.assertEqual(len(files["bar;foo"]), 3)
        for file in files["bar;foo"]:
            if file.name == 'compiler_complaint.png':
                self.assertEqual(file.tags, [
                    "foo", "bar", "componentId: 1234",
                    "configurationId: 12345", "configurationRowId: 12345",
                    "runId: 22123", "branchId: 312321"
                ])
    def test_get_input_files_definition_by_tag_w_system(self):
        ci = CommonInterface(
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         'data_examples', 'data_system_tags'))

        files = ci.get_input_files_definitions(tags=['dilbert'])

        self.assertEqual(len(files), 3)
        for file in files:
            if file.name == '21702.strip.print.gif':
                self.assertEqual(file.tags, [
                    "dilbert", "componentId: 1234", "configurationId: 12345",
                    "configurationRowId: 12345", "runId: 22123",
                    "branchId: 312321"
                ])
                self.assertEqual(file.max_age_days, 180)
                self.assertEqual(file.size_bytes, 4931)
    def test_create_and_write_table_manifest_multi(self):
        ci = CommonInterface()
        # create table def
        out_table = ci.create_out_table_definition(
            'some-table.csv',
            columns=['foo', 'bar'],
            destination='some-destination',
            primary_key=['foo'],
            incremental=True,
            delete_where={
                'column': 'lilly',
                'values': ['a', 'b'],
                'operator': 'eq'
            })
        out_table.table_metadata.add_table_metadata('bar', 'kochba')
        out_table.table_metadata.add_column_metadata('bar', 'foo', 'gogo')

        # write
        ci.write_manifests([out_table])
        manifest_filename = out_table.full_path + '.manifest'
        with open(manifest_filename) as manifest_file:
            config = json.load(manifest_file)
        self.assertEqual(
            {
                'destination': 'some-destination',
                'columns': ['foo', 'bar'],
                'primary_key': ['foo'],
                'incremental': True,
                'metadata': [{
                    'key': 'bar',
                    'value': 'kochba'
                }],
                'delimiter': ',',
                'enclosure': '"',
                'column_metadata': {
                    'bar': [{
                        'key': 'foo',
                        'value': 'gogo'
                    }]
                },
                'delete_where_column': 'lilly',
                'delete_where_values': ['a', 'b'],
                'delete_where_operator': 'eq'
            }, config)
        os.remove(manifest_filename)
    def test_get_input_tables_definition_orphaned_manifest(self):
        ci = CommonInterface()

        tables = ci.get_input_tables_definitions(orphaned_manifests=True)

        self.assertEqual(len(tables), 5)
        for table in tables:
            if table.name == 'sample.csv':
                self.assertEqual(table.columns, [
                    "x", "Sales", "CompPrice", "Income", "Advertising",
                    "Population", "Price", "ShelveLoc", "Age", "Education",
                    "Urban", "US", "High"
                ])
                self.assertEqual(table.rows_count, 400)
                self.assertEqual(table.data_size_bytes, 81920)
            elif table.name == 'fooBar':
                self.assertEqual(table.id, 'in.c-main.test2')
                self.assertEqual(table.full_path,
                                 os.path.join(ci.tables_in_path, 'fooBar'))
                self.assertEqual(table.name, 'fooBar')
    def test_create_and_write_file_manifest(self):
        ci = CommonInterface()
        # create table def
        out_file = ci.create_out_file_definition('some-file.jpg',
                                                 is_permanent=True,
                                                 is_encrypted=True,
                                                 is_public=True,
                                                 tags=['foo', 'bar'],
                                                 notify=True)

        # write
        ci.write_manifest(out_file)
        manifest_filename = out_file.full_path + '.manifest'
        with open(manifest_filename) as manifest_file:
            config = json.load(manifest_file)
        self.assertEqual(
            {
                'tags': ['foo', 'bar'],
                'is_public': True,
                'is_permanent': True,
                'is_encrypted': True,
                'notify': True
            }, config)
        os.remove(manifest_filename)
Exemple #14
0
import csv
import os

# Load the Component library to process the config file
from keboola.component import CommonInterface

# Rely on the KBC_DATADIR environment variable by default,
# alternatively provide a data folder path in the constructor (CommonInterface('data'))
ci = CommonInterface()
params = ci.configuration.parameters

print("Hello world from python")

csvlt = '\n'
csvdel = ','
csvquo = '"'

# get input table definition by name
in_table = ci.get_input_table_definition_by_name('bitcoin-price.csv')

with open(in_table.full_path, mode='rt', encoding='utf-8') as in_file, \
        open(os.path.join(ci.tables_out_path, 'odd.csv'), mode='wt', encoding='utf-8') as odd_file, \
        open(os.path.join(ci.tables_out_path, 'even.csv'), mode='wt', encoding='utf-8') as even_file:
    lazy_lines = (line.replace('\0', '') for line in in_file)
    reader = csv.DictReader(lazy_lines, lineterminator=csvlt, delimiter=csvdel,
                            quotechar=csvquo)

    odd_writer = csv.DictWriter(odd_file, fieldnames=reader.fieldnames,
                                lineterminator=csvlt, delimiter=csvdel,
                                quotechar=csvquo)
    odd_writer.writeheader()
 def test_get_tables_in_dir(self):
     ci = CommonInterface()
     tables_out = os.path.join(os.getenv('KBC_DATADIR', ''), 'in', 'files')
     self.assertEqual(tables_out, ci.files_in_path)
 def test_get_data_dir(self):
     ci = CommonInterface()
     self.assertEqual(os.getenv('KBC_DATADIR', ''), ci.data_folder_path)
 def test_missing_dir(self):
     os.environ["KBC_DATADIR"] = "asdf"
     with self.assertRaisesRegex(ValueError,
                                 "The data directory does not exist"):
         CommonInterface()
 def test_state_file_initialized(self):
     ci = CommonInterface()
     state = ci.get_state_file()
     self.assertEqual(state['test_state'], 1234)
 def test_get_input_table_by_name_fails_on_nonexistent(self):
     ci = CommonInterface()
     with self.assertRaises(ValueError):
         ci.get_input_table_definition_by_name('nonexistent.csv')