def test_not_null_cols_must_be_a_foreign_key(self):
        # The assumption there is that the first table doesn't have any
        # foreign keys.
        non_fk_col = self.schema_sl.tables[0].cols[0]
        fk_col = None

        # Let's check that assumption, just to be paranoid that the starting
        # conditions of this test are valid.
        for table in self.schema_sl.tables:
            for fk in table.foreign_keys:
                for fk_col in fk.src_cols:
                    assert non_fk_col != fk_col
                    if fk_col is None:
                        fk_col = fk_col

        assert fk_col is not None

        # Test for a valid foreign key
        data = [
            {'not-null-columns': [{'table': fk_col.table.name,
                                   'column': fk_col.name}]}]
        model = ExtractionModel.load(self.schema_sl, data)
        assert len(model.not_null_cols) == 1

        # Test for a non foreign key
        data = [
            {'not-null-columns': [{'table': non_fk_col.table.name,
                                   'column': non_fk_col.name}]}]
        with pytest.raises(RelationIntegrityError) as e:
            ExtractionModel.load(self.schema_sl, data)
        assert 'not-null-columns can only be used on foreign keys' in str(e)
 def test_toplevel_key_value(self):
     # Check bad None value for top level keys
     for key in ('subject', 'relations', 'not-null-columns'):
         with pytest.raises(ValidationError) as e:
             data = [{key: None}]
             ExtractionModel.load(self.schema_sl, data)
         assert 'is not of type' in str(e)
 def test_illegal_sticky_disabled_combination(self):
     for sticky in [True, False]:
         relation = dict(self.relations[0])
         relation['sticky'] = sticky
         relation['disabled'] = True
         data = [{'relations': [relation]}]
         with pytest.raises(InvalidConfigError) as e:
             ExtractionModel.load(self.schema_sl, data)
         assert 'The sticky flag is meaningless' in str(e)
Beispiel #4
0
 def test_illegal_disabled_not_null_outgoing(self, schema1, data1):
     relation = {
         'table': 'test2',
         'column': 'test1_nn_id',
         'type': Relation.TYPE_OUTGOING,
         'disabled': True
     }
     data = [{'relations': [relation]}]
     with pytest.raises(RelationIntegrityError):
         ExtractionModel.load(schema1, data)
    def test_not_null_cols_must_be_toplevel(self):
        not_null_cols = [{'not-null-columns': []}]
        data = [{'relations': not_null_cols}]
        with pytest.raises(ValidationError) as e:
            ExtractionModel.load(self.schema_sl, data)
        assert 'Additional properties are not allowed' in str(e)

        data = [{'tables': not_null_cols}]
        with pytest.raises(ValidationError) as e:
            ExtractionModel.load(self.schema_sl, data)
        assert 'Additional properties are not allowed' in str(e)
    def test_non_existent_table_data(self):
        # Check unknown table
        data = [{'subject': [{'tables': [{'table': 'foo'}]}]}]
        with pytest.raises(UnknownTableError):
            ExtractionModel.load(self.schema_sl, data)

        # Check unknown column
        table_name = self.schema_sl.tables[0].name
        table = {'table': table_name, 'column': 'unknown', 'values': [1]}
        subject = [{'tables': [table]}]
        data = [{'subject': subject}]
        with pytest.raises(UnknownColumnError):
            ExtractionModel.load(self.schema_sl, data)
    def test_non_existent_not_null_cols_data(self):
        relation = dict(self.relations[0])
        table = relation['table']
        col = relation['column']

        data = [
            {'not-null-columns': [{'table': 'foo', 'column': col}]}]
        with pytest.raises(UnknownTableError):
            ExtractionModel.load(self.schema_sl, data)

        data = [
            {'not-null-columns': [{'table': table, 'column': 'unknown'}]}]
        with pytest.raises(UnknownColumnError):
            ExtractionModel.load(self.schema_sl, data)
    def test_relation_defaults_keys(self):
        # Test both None
        relation = dict(self.relations[0])
        table = relation.pop('table')
        data = [{'relations': [relation]}]
        with pytest.raises(InvalidConfigError) as e:
            ExtractionModel.load(self.schema_sl, data)
        assert 'Either defaults or table must be set' in str(e)

        # Test both set
        relation['table'] = table
        relation['defaults'] = Relation.DEFAULT_EVERYTHING
        with pytest.raises(InvalidConfigError) as e:
            ExtractionModel.load(self.schema_sl, data)
        assert 'Either defaults or table must be set' in str(e)
    def test_relation_defaults(self, relation_defaults, expect_nullable,
                               expect_incoming):
        relations = []
        for relation_default in relation_defaults:
            relations.append({'defaults': relation_default})
        data = [{'relations': relations}]
        model = ExtractionModel.load(self.schema_sl, data)

        got_outgoing_not_null = False
        got_outgoing_nullable = False
        got_incoming = False
        got_outgoing = False

        relation_counts = defaultdict(int)
        for relation in model.relations:
            relation_counts[relation] += 1
            if relation_counts[relation] > 1:
                pytest.fail('Got duplicate relation "%s"' % relation)

            if relation.type == Relation.TYPE_OUTGOING:
                got_outgoing = True
                if relation.foreign_key.notnull:
                    got_outgoing_not_null = True
                else:
                    got_outgoing_nullable = True
            else:
                got_incoming = True

        assert got_outgoing is True
        assert got_outgoing_not_null is True
        assert expect_nullable == got_outgoing_nullable
        assert expect_incoming == got_incoming
 def test(values):
     table = {'table': self.schema_sl.tables[0].name, 'column': col,
              'values': values}
     subject = [{'relations': self.relations}, {'tables': [table]}]
     data = [{'subject': subject}]
     model = ExtractionModel.load(self.schema_sl, data)
     assert model.subjects[0].tables[0].values == values
Beispiel #11
0
 def get_generator_instance(self,
                            schema,
                            not_null_cols=None,
                            table='test1'):
     if not_null_cols is None:
         not_null_cols = []
     extraction_model_data = [
         {
             'subject': [{
                 'tables': [{
                     'table': table
                 }]
             }]
         },
         {
             'relations': [{
                 'defaults': Relation.DEFAULT_EVERYTHING
             }]
         },
         {
             'not-null-columns': not_null_cols
         },
     ]
     extraction_model = ExtractionModel.load(schema, extraction_model_data)
     extractor = Extractor(self.database, extraction_model)
     return Generator(schema, extractor)
Beispiel #12
0
 def test_two_tables_double_overlapping_subject(self, schema2, data2):
     extraction_model_data = [
         {
             'relations': [{
                 'table': 'test2',
                 'column': 'test1_id'
             }]
         },
         {
             'subject': [{
                 'tables': [{
                     'table': 'test1'
                 }]
             }]
         },
         {
             'subject': [{
                 'tables': [{
                     'table': 'test2'
                 }]
             }]
         },
     ]
     extraction_model = ExtractionModel.load(schema2, extraction_model_data)
     extractor = Extractor(self.database, extraction_model).launch()
     assert extractor.flat_results() == data2
    def test_subject_table_with_just_a_table_key(self):
        table = {'table': self.schema_sl.tables[0].name}
        subject = [{'relations': self.relations}, {'tables': [table]}]
        data = [{'subject': subject}]

        model = ExtractionModel.load(self.schema_sl, data)
        assert len(model.subjects[0].tables) == 1
        assert model.subjects[0].tables[0].table.name == table['table']
 def test_subject_table_column_and_values_keys_both_set(self):
     table = {'table': self.schema_sl.tables[0].name}
     col = table['column'] = self.schema_sl.tables[0].cols[0].name
     for key in ['column', 'values']:
         table = {'table': self.schema_sl.tables[0].name}
         if key == 'column':
             table['column'] = col
         if key == 'values':
             table['values'] = 1
         subject = [{'relations': self.relations}, {'tables': [table]}]
         data = [{'subject': subject}]
         with pytest.raises(InvalidConfigError) as e:
             ExtractionModel.load(self.schema_sl, data)
         if key == 'column':
             assert 'A table with a column must have values' in str(e)
         if key == 'values':
             assert 'A table with values must have a column' in str(e)
 def test_schema_sl_relations(self):
     data = [{'relations': self.relations}]
     model = ExtractionModel.load(self.schema_sl, data)
     assert model.relations[0].table.name == self.relations[0]['table']
     fk_col = model.relations[0].foreign_key.src_cols[0]
     assert fk_col.name == self.relations[0]['column']
     assert model.relations[0].name == self.relations[0]['name']
     assert repr(model.relations[0]) is not None
    def test_relation_keys(self):
        relation = dict(self.relations[0])

        # A missing name is ok
        relation = dict(self.relations[0])
        del relation['name']
        data = [{'relations': [relation]}]
        ExtractionModel.load(self.schema_sl, data)

        # A missing column is not ok
        relation = dict(self.relations[0])
        del relation['column']
        data = [{'relations': [relation]}]
        with pytest.raises(RelationIntegrityError) as e:
            ExtractionModel.load(self.schema_sl, data)
        assert 'Non default relations must have a column on table' in str(e)

        # A null name is ok
        relation = dict(self.relations[0])
        relation['name'] = None
        data = [{'relations': [relation]}]
        ExtractionModel.load(self.schema_sl, data)

        # Unknown key
        relation = dict(self.relations[0])
        relation['foo'] = 'bar'
        data = [{'relations': [relation]}]
        with pytest.raises(ValidationError):
            ExtractionModel.load(self.schema_sl, data)

        def check_bool(key, default_value, relation_key=None):
            if relation_key is None:
                relation_key = key

            # Test bool values
            for value in (True, False):
                relation = dict(self.relations[0])
                relation[key] = value
                data = [{'relations': [relation]}]
                model = ExtractionModel.load(self.schema_sl, data)
                assert getattr(model.relations[0], relation_key) is value

            # Test bad value exception
            relation[key] = 'foo'
            with pytest.raises(ValidationError):
                ExtractionModel.load(self.schema_sl, data)

            # Check default value
            relation = dict(self.relations[0])
            data = [{'relations': [relation]}]
            model = ExtractionModel.load(self.schema_sl, data)
            assert getattr(model.relations[0], relation_key) is default_value

        check_bool('disabled', False)
        check_bool('sticky', False, relation_key='propagate_sticky')
    def test_relation_type(self):
        # Check type
        relation = dict(self.relations[0])
        relation['type'] = 'bar'
        data = [{'relations': [relation]}]
        with pytest.raises(ValidationError):
            ExtractionModel.load(self.schema_sl, data)

        type_tests = [(Relation.TYPE_INCOMING, Relation.TYPE_INCOMING),
                      (Relation.TYPE_OUTGOING, Relation.TYPE_OUTGOING),
                      (None, Relation.TYPE_INCOMING)]
        for (value, expected_value) in type_tests:
            relation = dict(self.relations[0])
            data = [{'relations': [relation]}]
            if value is not None:
                relation['type'] = value
            model = ExtractionModel.load(self.schema_sl, data)
            assert model.relations[0].type == expected_value
Beispiel #18
0
 def test_two_tables_double_overlapping_subject(self, schema2, data2):
     extraction_model_data = [
         {'relations': [{'table': 'test2', 'column': 'test1_id'}]},
         {'subject': [{'tables': [{'table': 'test1'}]}]},
         {'subject': [{'tables': [{'table': 'test2'}]}]},
     ]
     extraction_model = ExtractionModel.load(schema2, extraction_model_data)
     extractor = Extractor(self.database, extraction_model).launch()
     assert extractor.flat_results() == data2
    def model_of_table0(self):
        table = {'table': self.schema_sl.tables[0].name}

        subject = [
            {'relations': self.relations},
            {'tables': [table]}
        ]

        data = [{'subject': subject}]
        return ExtractionModel.load(self.schema_sl, data)
Beispiel #20
0
 def test_results_row_str_and_repr(self, schema1, data1):
     extraction_model_data = [
         {'subject': [{'tables': [{'table': 'test1'}]}]},
     ]
     extraction_model = ExtractionModel.load(schema1, extraction_model_data)
     extractor = Extractor(self.database, extraction_model).launch()
     table1 = schema1.tables[0]
     result_rows = extractor.results[table1][table1.primary_key]
     for result_row in list(result_rows.values()):
         assert repr(result_row) is not None
Beispiel #21
0
 def get_generator_instance(self, schema, not_null_cols=None,
                            table='test1'):
     if not_null_cols is None:
         not_null_cols = []
     extraction_model_data = [
         {'subject': [{'tables': [{'table': table}]}]},
         {'relations': [{'defaults': Relation.DEFAULT_EVERYTHING}]},
         {'not-null-columns': not_null_cols},
     ]
     extraction_model = ExtractionModel.load(schema, extraction_model_data)
     extractor = Extractor(self.database, extraction_model)
     return Generator(schema, extractor)
    def test_non_existent_relation_table_data(self):
        # Check unknown table
        relation = dict(self.relations[0])
        relation['table'] = 'foo'
        data = [{'relations': [relation]}]
        with pytest.raises(UnknownTableError):
            ExtractionModel.load(self.schema_sl, data)

        # Check unknown column
        relation = dict(self.relations[0])
        relation['column'] = 'unknown'
        data = [{'relations': [relation]}]
        with pytest.raises(UnknownColumnError):
            ExtractionModel.load(self.schema_sl, data)

        # Check known column, but it's not a foreign key
        known_relations = set()
        for rel in self.relations:
            known_relations.add((rel['table'], rel['column']))
        found_test = None
        for table in self.schema_sl.tables:
            for col in table.cols:
                if (table.name, col.name) not in known_relations:
                    found_test = (table.name, col.name)
                    break
        if found_test is None:
            raise Exception('Unable to find a test table/column')
        (table, col) = found_test

        relation = {'table': table, 'column': col}
        data = [{'relations': [relation]}]
        with pytest.raises(RelationIntegrityError):
            ExtractionModel.load(self.schema_sl, data)
        def check_bool(key, default_value, relation_key=None):
            if relation_key is None:
                relation_key = key

            # Test bool values
            for value in (True, False):
                relation = dict(self.relations[0])
                relation[key] = value
                data = [{'relations': [relation]}]
                model = ExtractionModel.load(self.schema_sl, data)
                assert getattr(model.relations[0], relation_key) is value

            # Test bad value exception
            relation[key] = 'foo'
            with pytest.raises(ValidationError):
                ExtractionModel.load(self.schema_sl, data)

            # Check default value
            relation = dict(self.relations[0])
            data = [{'relations': [relation]}]
            model = ExtractionModel.load(self.schema_sl, data)
            assert getattr(model.relations[0], relation_key) is default_value
    def test_toplevel_keys(self):
        # Test good cases
        not_null_cols = {'not-null-columns': []}
        relations = {'relations': self.relations}
        table = {'table': self.schema_sl.tables[0].name}
        subject = {'subject': [
            {'relations': self.relations},
            {'tables': [table]}
        ]}

        ExtractionModel.load(self.schema_sl, [not_null_cols])
        ExtractionModel.load(self.schema_sl, [relations])
        ExtractionModel.load(self.schema_sl, [subject])

        # Test exactly one of the keys has to be set
        with pytest.raises(InvalidConfigError) as e:
            ExtractionModel.load(self.schema_sl, [{}])
        assert 'Expected one key, got' in str(e)

        with pytest.raises(InvalidConfigError) as e:
            data = dict(not_null_cols)
            data.update(relations)
            ExtractionModel.load(self.schema_sl, [data])
        assert 'Expected one key, got' in str(e)
Beispiel #25
0
 def test_results_row_str_and_repr(self, schema1, data1):
     extraction_model_data = [
         {
             'subject': [{
                 'tables': [{
                     'table': 'test1'
                 }]
             }]
         },
     ]
     extraction_model = ExtractionModel.load(schema1, extraction_model_data)
     extractor = Extractor(self.database, extraction_model).launch()
     table1 = schema1.tables[0]
     result_rows = extractor.results[table1][table1.primary_key]
     for result_row in list(result_rows.values()):
         assert repr(result_row) is not None
Beispiel #26
0
    def test_sticky_combinations(self, schema1, data1, type_,
                                 notnull, sticky, propagate_sticky,
                                 only_if_sticky):
        col = 'test1_nn_id' if notnull else 'test1_id'
        relation = {'table': 'test2', 'column': col,
                    'type': type_, 'sticky': sticky}
        data = [{'relations': [relation]}]
        model = ExtractionModel.load(schema1, data)
        relation = model.relations[0]
        assert relation.propagate_sticky == propagate_sticky
        assert relation.only_if_sticky == only_if_sticky

        if type_ == Relation.TYPE_OUTGOING:
            assert len(merge_relations(model.relations)) == 2
        else:
            assert len(merge_relations(model.relations)) == 3
    def test_toplevel(self):
        with pytest.raises(ValidationError) as e:
            data = 'foo'
            ExtractionModel.load(self.schema_sl, data)
        assert 'is not of type' in str(e)

        with pytest.raises(ValidationError) as e:
            data = ['foo']
            ExtractionModel.load(self.schema_sl, data)
        assert 'is not of type' in str(e)

        with pytest.raises(ValidationError) as e:
            data = [{'foo': [], 'bar': []}]
            ExtractionModel.load(self.schema_sl, data)
        assert 'Additional properties are not allowed' in str(e)

        with pytest.raises(ValidationError) as e:
            data = [{'foo': 'bar'}]
            ExtractionModel.load(self.schema_sl, data)
        assert 'Additional properties are not allowed' in str(e)
Beispiel #28
0
    def test_sticky_combinations(self, schema1, data1, type_, notnull, sticky,
                                 propagate_sticky, only_if_sticky):
        col = 'test1_nn_id' if notnull else 'test1_id'
        relation = {
            'table': 'test2',
            'column': col,
            'type': type_,
            'sticky': sticky
        }
        data = [{'relations': [relation]}]
        model = ExtractionModel.load(schema1, data)
        relation = model.relations[0]
        assert relation.propagate_sticky == propagate_sticky
        assert relation.only_if_sticky == only_if_sticky

        if type_ == Relation.TYPE_OUTGOING:
            assert len(merge_relations(model.relations)) == 2
        else:
            assert len(merge_relations(model.relations)) == 3
Beispiel #29
0
    def check_launch(self, schema, extraction_model_data,
                     expected_data, global_relations=None,
                     expected_fetch_count=None, one_subject=True):
        if global_relations is not None:
            extraction_model_data.append({'relations': global_relations})

        extraction_model = ExtractionModel.load(schema, extraction_model_data)
        extractor = Extractor(self.database, extraction_model).launch()
        expected_data = sorted(expected_data, key=lambda t: t[0].name)

        if extractor.flat_results() != expected_data:
            print()
            print('Got results:')
            pprint(extractor.flat_results())
            print('Expected results:')
            pprint(expected_data)
        assert extractor.flat_results() == expected_data
        if expected_fetch_count is not None:
            assert extractor.fetch_count == expected_fetch_count
        return extractor
Beispiel #30
0
    def check_launch(self,
                     schema,
                     extraction_model_data,
                     expected_data,
                     global_relations=None,
                     expected_fetch_count=None,
                     one_subject=True):
        if global_relations is not None:
            extraction_model_data.append({'relations': global_relations})

        extraction_model = ExtractionModel.load(schema, extraction_model_data)
        extractor = Extractor(self.database, extraction_model).launch()
        expected_data = sorted(expected_data, key=lambda t: t[0].name)

        if extractor.flat_results() != expected_data:
            print()
            print('Got results:')
            pprint(extractor.flat_results())
            print('Expected results:')
            pprint(expected_data)
        assert extractor.flat_results() == expected_data
        if expected_fetch_count is not None:
            assert extractor.fetch_count == expected_fetch_count
        return extractor
    def test_subject_keys(self):
        table = {'table': self.schema_sl.tables[0].name}
        subject = [
            {'relations': self.relations},
            {'tables': [table]}
        ]
        ExtractionModel.load(self.schema_sl, [{'subject': subject}])

        # Test zero keys
        with pytest.raises(InvalidConfigError) as e:
            subject.append({})
            ExtractionModel.load(self.schema_sl, [{'subject': subject}])
        assert 'Expected one key, got' in str(e)

        # Test two keys
        with pytest.raises(InvalidConfigError) as e:
            subject[2] = dict(subject[0])
            subject[2].update(subject[1])
            ExtractionModel.load(self.schema_sl, [{'subject': subject}])
        assert 'Expected one key, got' in str(e)
 def test_subject_must_have_at_least_one_table(self):
     data = [{'subject': [{'relations': self.relations}]}]
     with pytest.raises(InvalidConfigError) as e:
         ExtractionModel.load(self.schema_sl, data)
     assert 'A subject must have at least one table' in str(e)
Beispiel #33
0
def process_toplevel_example(toplevel_example, testing=True):
    database = SqliteDatabase(path=':memory:')
    conn = database.connection

    schema_filename = toplevel_example['schema']
    svg_filename = schema_filename.replace('.sql', '')
    svg_path = file_path(os.path.join('_static', svg_filename))
    schema_lines = read_file(schema_filename)
    conn.executescript(schema_lines)
    schema = SqliteSchema.create_from_conn(conn)
    make_graph(schema, svg_path)

    examples = []
    for example in toplevel_example['examples']:
        (title, ref, description, config, expected_statements,
         short_description) = (
            example['title'], example.get('ref'), example['description'],
            example['config'], example['expected_statements'],
            example['short_description'])

        extraction_model = ExtractionModel.load(schema, config)

        with stdout_redirect(StringIO()) as new_stdout:
            extractor = Extractor(database, extraction_model,
                                  explain=True).launch()
        new_stdout.flush()
        new_stdout.seek(0)
        output = new_stdout.read()
        output = output.rstrip().split('\n')

        generator = Generator(schema, extractor)
        generator.generate_statements()

        statements = []
        for insert_statement in generator.insert_statements:
            (stmt, values) = list(database.make_insert_statement(
                insert_statement))
            statements.append(complete_statement(stmt, values))

        for update_statement in generator.update_statements:
            (stmt, values) = list(database.make_update_statement(
                update_statement))
            statements.append(complete_statement(stmt, values))

        if expected_statements != statements:
            print('There is a mismatch in expected statements.')
            print('Schema:%s\n' % schema)
            print('Config:')
            pprint(config)
            print('Statements:')
            for stmt in statements:
                print(stmt)
            print('Expected statements:')
            for stmt in expected_statements:
                print(stmt)

            if testing:
                raise Exception('Mistmatch in statements. See stdout output.')
            exit(1)

        examples.append({
            'title': title,
            'ref': ref,
            'description': description,
            'sdesc': short_description,
            'config': yaml.dump(config).split("\n"),
            'output': output,
            'statements': statements,
        })

    doc_filename = toplevel_example['doc_filename']

    max_ref_len = max([len(e['ref']) for e in examples])
    max_sdesc_len = max([len(e['sdesc']) for e in examples])

    data = {
        'title': toplevel_example['title'],
        'description': toplevel_example.get('description'),
        'schema_svg': os.path.join('_static', svg_filename),
        'schema': schema_lines.split('\n'),
        'examples': examples,
        'max_ref_len': max_ref_len,
        'max_sdesc_len': max_sdesc_len,
    }

    if testing:
        return

    # Write main rst file
    template = Template(open(file_path('examples-example.rst.j2')).read())
    with open(file_path('%s.rst' % doc_filename), 'wt') as f:
        f.write(template.render(**data))

    # Write table rst file
    if toplevel_example.get('make_table', True):
        template = Template(open(file_path('examples-table.rst.j2')).read())
        with open(file_path('%s_table.rst' % doc_filename), 'wt') as f:
            f.write(template.render(**data))

    return doc_filename
Beispiel #34
0
def main(args):
    parser = argparse.ArgumentParser(
        description='Minimize a database',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=textwrap.dedent(EPILOG))

    parser.add_argument(dest='config_path', metavar='CONFIG_PATH',
                        help="path to extraction config file")
    parser.add_argument(dest='src_url', metavar='SRC_URL',
                        help="source database url")
    parser.add_argument('-u', '--url', dest='dst_url', metavar='URL',
                        help="destination database url")
    parser.add_argument('-f', '--file', dest='dst_file', metavar='FILE',
                        help="destination database file. Use - for stdout")
    parser.add_argument('-e', '--explain', dest='explain', action='store_true',
                        default=False,
                        help='explain where rows are coming from')
    parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
                        default=False,
                        help="don't output anything")
    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                        default=False,
                        help="verbose output")

    # Ignore SIG_PIPE and don't throw exceptions on it
    signal(SIGPIPE, SIG_DFL)

    args = parser.parse_args(args)

    verbosity = 1
    if args.quiet:
        verbosity = 0
    if args.verbose:
        verbosity = 2

    if args.explain:
        if args.dst_url is not None:
            print('-u is meaningless when using -e')
            exit(1)
        if args.dst_file is not None:
            print('-f is meaningless when using -e')
            exit(1)
    else:
        if (args.dst_url is None) == (args.dst_file is None):
            print('Either -u or -f must be passed')
            exit(1)

    src_database = abridger.database.load(args.src_url, verbose=verbosity > 0)

    if not args.explain:
        if args.dst_url is not None:
            outputter = DbOutputter(args.dst_url, verbosity)
            if not isinstance(src_database, type(outputter.database)):
                print('src and dst databases must be of the same type')
                exit(1)
        else:
            outputter = SqlOutputter(src_database, args.dst_file, verbosity)

    if verbosity > 0:
        print('Querying...')
    extraction_model_data = abridger.config_file_loader.load(args.config_path)
    extraction_model = ExtractionModel.load(src_database.schema,
                                            extraction_model_data)
    extractor = Extractor(src_database, extraction_model, explain=args.explain,
                          verbosity=verbosity)
    extractor.launch()

    if args.explain:
        exit(0)

    generator = Generator(src_database.schema, extractor)
    generator.generate_statements()

    if args.dst_url is not None:
        # The src database isn't needed any more
        src_database.disconnect()

    total_table_insert_counts = defaultdict(int)
    total_table_update_counts = defaultdict(int)
    table_insert_counts = defaultdict(int)
    table_update_counts = defaultdict(int)
    total_insert_count = len(generator.insert_statements)
    total_update_count = len(generator.update_statements)
    total_count = total_insert_count + total_update_count

    start_time = time()

    try:
        for insert_statement in generator.insert_statements:
            (table, values) = insert_statement
            total_table_insert_counts[table] += 1

        for update_statement in generator.update_statements:
            table = update_statement[0]
            total_table_update_counts[table] += 1

        if verbosity > 0:
            insert_tables = set(total_table_insert_counts.keys())
            update_tables = set(total_table_insert_counts.keys())
            tables = insert_tables | update_tables

            if args.dst_url is not None:
                print(
                    'Performing %d inserts and %d updates to %d tables...' % (
                        total_insert_count, total_update_count, len(tables)))
            else:
                print(
                    'Writing SQL for %d inserts and %d updates '
                    'in %d tables...' % (
                        total_insert_count, total_update_count, len(tables)))

        insert_count = 0
        count = 0
        outputter.begin()
        for insert_statement in generator.insert_statements:
            (table, values) = insert_statement
            table_insert_counts[table] += 1
            insert_count += 1
            count += 1
            if verbosity > 1:
                percentage = math.floor(1000 * (count / total_count)) / 10
                print("%5.1f%% Inserting (%6d/%6d) row (%6d/%6d) in %s" % (
                    percentage,
                    insert_count, total_insert_count,
                    table_insert_counts[table],
                    total_table_insert_counts[table],
                    table))
            outputter
            outputter.insert_row(insert_statement)

        update_count = 0
        for update_statement in generator.update_statements:
            table = update_statement[0]
            table_update_counts[table] += 1
            update_count += 1
            count += 1
            if verbosity > 1:
                percentage = math.floor(1000 * (count / total_count)) / 10
                print("%5.1f%% Updating  (%6d/%6d) row (%6d/%6d) in %s" % (
                    percentage,
                    update_count, total_update_count,
                    total_table_update_counts[table],
                    table_update_counts[table],
                    table))
            outputter.update_row(update_statement)

        outputter.commit()
    finally:
        # Try to rollback in case something went wrong; ignore any errors
        try:
            outputter.rollback()
        except:   # pragma: no cover
            pass  # pragma: no cover

        src_database.disconnect()

    if verbosity > 0:
        if args.dst_url is not None:
            elapsed_time = time() - start_time
            print('Data loading completed in %0.1f seconds' % elapsed_time)
        else:
            print('Done')
Beispiel #35
0
 def test_illegal_disabled_without_a_column(self, schema1, data1):
     relation = {'table': 'test2', 'disabled': True}
     data = [{'relations': [relation]}]
     with pytest.raises(RelationIntegrityError):
         ExtractionModel.load(schema1, data)
 def relation0(self):
     relation = dict(self.relations[0])
     data = [{'relations': [relation]}]
     model = ExtractionModel.load(self.schema_sl, data)
     return model.relations[0]
Beispiel #37
0
 def test_illegal_disabled_without_a_column(self, schema1, data1):
     relation = {'table': 'test2', 'disabled': True}
     data = [{'relations': [relation]}]
     with pytest.raises(RelationIntegrityError):
         ExtractionModel.load(schema1, data)
    def test_not_null_cols(self):
        relation = dict(self.relations[0])
        table = relation['table']
        col = relation['column']

        data = [{'not-null-columns': []}]
        ExtractionModel.load(self.schema_sl, data)

        data = [{'not-null-columns': []}]
        ExtractionModel.load(self.schema_sl, data)

        # It must have table and column keys
        with pytest.raises(ValidationError) as e:
            data = [{'not-null-columns': [{}]}]
            ExtractionModel.load(self.schema_sl, data)
        assert "is a required property" in str(e)

        with pytest.raises(ValidationError) as e:
            data = [{'not-null-columns': [{'table': table}]}]
            ExtractionModel.load(self.schema_sl, data)
        assert "'column' is a required property" in str(e)

        with pytest.raises(ValidationError) as e:
            data = [{'not-null-columns': [{'column': col}]}]
            ExtractionModel.load(self.schema_sl, data)
        assert "'table' is a required property" in str(e)

        data = [
            {'not-null-columns': [{'table': table, 'column': col}]}]
        model = ExtractionModel.load(self.schema_sl, data)
        assert len(model.not_null_cols) == 1
        afc = model.not_null_cols[0]
        assert afc.table.name == table
        assert afc.col.name == col
        assert afc.foreign_key.src_cols[0].table.name == table
        assert col in [c.name for c in afc.foreign_key.src_cols]
Beispiel #39
0
def main(args):
    parser = argparse.ArgumentParser(
        description='Minimize a database',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=textwrap.dedent(EPILOG))

    parser.add_argument(dest='config_path',
                        metavar='CONFIG_PATH',
                        help="path to extraction config file")
    parser.add_argument(dest='src_url',
                        metavar='SRC_URL',
                        help="source database url")
    parser.add_argument('-u',
                        '--url',
                        dest='dst_url',
                        metavar='URL',
                        help="destination database url")
    parser.add_argument('-f',
                        '--file',
                        dest='dst_file',
                        metavar='FILE',
                        help="destination database file. Use - for stdout")
    parser.add_argument('-e',
                        '--explain',
                        dest='explain',
                        action='store_true',
                        default=False,
                        help='explain where rows are coming from')
    parser.add_argument('-q',
                        '--quiet',
                        dest='quiet',
                        action='store_true',
                        default=False,
                        help="don't output anything")
    parser.add_argument('-v',
                        '--verbose',
                        dest='verbose',
                        action='store_true',
                        default=False,
                        help="verbose output")

    # Ignore SIG_PIPE and don't throw exceptions on it
    signal(SIGPIPE, SIG_DFL)

    args = parser.parse_args(args)

    verbosity = 1
    if args.quiet:
        verbosity = 0
    if args.verbose:
        verbosity = 2

    if args.explain:
        if args.dst_url is not None:
            print('-u is meaningless when using -e')
            exit(1)
        if args.dst_file is not None:
            print('-f is meaningless when using -e')
            exit(1)
    else:
        if (args.dst_url is None) == (args.dst_file is None):
            print('Either -u or -f must be passed')
            exit(1)

    src_database = abridger.database.load(args.src_url, verbose=verbosity > 0)

    if not args.explain:
        if args.dst_url is not None:
            outputter = DbOutputter(args.dst_url, verbosity)
            if not isinstance(src_database, type(outputter.database)):
                print('src and dst databases must be of the same type')
                exit(1)
        else:
            outputter = SqlOutputter(src_database, args.dst_file, verbosity)

    if verbosity > 0:
        print('Querying...')
    extraction_model_data = abridger.config_file_loader.load(args.config_path)
    extraction_model = ExtractionModel.load(src_database.schema,
                                            extraction_model_data)
    extractor = Extractor(src_database,
                          extraction_model,
                          explain=args.explain,
                          verbosity=verbosity)
    extractor.launch()

    if args.explain:
        exit(0)

    generator = Generator(src_database.schema, extractor)
    generator.generate_statements()

    if args.dst_url is not None:
        # The src database isn't needed any more
        src_database.disconnect()

    total_table_insert_counts = defaultdict(int)
    total_table_update_counts = defaultdict(int)
    table_insert_counts = defaultdict(int)
    table_update_counts = defaultdict(int)
    total_insert_count = len(generator.insert_statements)
    total_update_count = len(generator.update_statements)
    total_count = total_insert_count + total_update_count

    start_time = time()

    try:
        for insert_statement in generator.insert_statements:
            (table, values) = insert_statement
            total_table_insert_counts[table] += 1

        for update_statement in generator.update_statements:
            table = update_statement[0]
            total_table_update_counts[table] += 1

        if verbosity > 0:
            insert_tables = set(total_table_insert_counts.keys())
            update_tables = set(total_table_insert_counts.keys())
            tables = insert_tables | update_tables

            if args.dst_url is not None:
                print('Performing %d inserts and %d updates to %d tables...' %
                      (total_insert_count, total_update_count, len(tables)))
            else:
                print('Writing SQL for %d inserts and %d updates '
                      'in %d tables...' %
                      (total_insert_count, total_update_count, len(tables)))

        insert_count = 0
        count = 0
        outputter.begin()
        for insert_statement in generator.insert_statements:
            (table, values) = insert_statement
            table_insert_counts[table] += 1
            insert_count += 1
            count += 1
            if verbosity > 1:
                percentage = math.floor(1000 * (count / total_count)) / 10
                print("%5.1f%% Inserting (%6d/%6d) row (%6d/%6d) in %s" %
                      (percentage, insert_count, total_insert_count,
                       table_insert_counts[table],
                       total_table_insert_counts[table], table))
            outputter
            outputter.insert_row(insert_statement)

        update_count = 0
        for update_statement in generator.update_statements:
            table = update_statement[0]
            table_update_counts[table] += 1
            update_count += 1
            count += 1
            if verbosity > 1:
                percentage = math.floor(1000 * (count / total_count)) / 10
                print("%5.1f%% Updating  (%6d/%6d) row (%6d/%6d) in %s" %
                      (percentage, update_count, total_update_count,
                       total_table_update_counts[table],
                       table_update_counts[table], table))
            outputter.update_row(update_statement)

        outputter.commit()
    finally:
        # Try to rollback in case something went wrong; ignore any errors
        try:
            outputter.rollback()
        except:  # pragma: no cover
            pass  # pragma: no cover

        src_database.disconnect()

    if verbosity > 0:
        if args.dst_url is not None:
            elapsed_time = time() - start_time
            print('Data loading completed in %0.1f seconds' % elapsed_time)
        else:
            print('Done')