예제 #1
0
    def test(self):
        schema, _, _ = utils.init_schema(
            'tests/fixtures/declarative_schema/schema.csv')
        filename = os.path.join(self.dirname, 'test.svg')
        utils.viz_schema(schema, filename)
        self.assertTrue(os.path.isfile(filename))

        schema, _, _ = utils.init_schema(
            'tests/fixtures/declarative_schema/viz_schema.csv')
        filename = os.path.join(self.dirname, 'test.svg')
        utils.viz_schema(schema, filename)
        self.assertTrue(os.path.isfile(filename))
예제 #2
0
    def test_normalize(self):
        csv_file = os.path.join('tests', 'fixtures', 'declarative_schema', 'schema.csv')
        py_file = os.path.join(self.tempdir, 'schema.py')
        with __main__.App(argv=['init-schema', csv_file, py_file]) as app:
            app.run()
        csv_schema, _, csv_models = utils.init_schema(csv_file)
        py_schema = utils.get_schema(py_file)
        py_models = list(utils.get_models(py_schema).values())
        self.assertEqual(set(model.__name__ for model in csv_models), set(model.__name__ for model in py_models))

        xl_file_1 = os.path.join(self.tempdir, 'file1.xlsx')
        p_0 = csv_schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')
        io.WorkbookWriter().run(xl_file_1, [p_0], models=csv_models)

        xl_file_2 = os.path.join(self.tempdir, 'file2.xlsx')
        with __main__.App(argv=['normalize', csv_file, 'Parent', xl_file_1, xl_file_2]) as app:
            app.run()

        p_0_b = io.WorkbookReader().run(xl_file_2,
                                        models=csv_models,
                                        ignore_missing_attributes=True)[csv_schema.Parent][0]
        self.assertTrue(p_0_b.is_equal(p_0))

        with self.assertRaises(SystemExit):
            with __main__.App(argv=['normalize', csv_file, 'Parent2', xl_file_1, xl_file_2]) as app:
                app.run()
예제 #3
0
    def test_gen_template(self):
        csv_file = os.path.join('tests', 'fixtures', 'declarative_schema', 'schema.csv')
        xl_file = os.path.join(self.tempdir, 'file.xlsx')
        with __main__.App(argv=['gen-template', csv_file, xl_file]) as app:
            app.run()
        csv_schema, _, csv_models = utils.init_schema(csv_file)

        py_file = os.path.join(self.tempdir, 'schema.py')
        with __main__.App(argv=['init-schema', csv_file, py_file]) as app:
            app.run()
        py_schema = utils.get_schema(py_file)
        py_models = list(utils.get_models(py_schema).values())
        self.assertEqual(set(model.__name__ for model in csv_models), set(model.__name__ for model in py_models))

        objs = io.WorkbookReader().run(xl_file,
                                       models=csv_models)
        self.assertEqual(objs, {
            csv_schema.Parent: [],
            csv_schema.Child: [],
            csv_schema.Quantity: [],
        })

        csv_file = os.path.join(self.tempdir, 'file-*.xlsx')
        with __main__.App(argv=['gen-template', py_file, csv_file]) as app:
            app.run()
        objs = io.WorkbookReader().run(csv_file, models=py_models,
                                       group_objects_by_model=False)
        self.assertEqual(objs, None)
예제 #4
0
    def test_get_model(self):
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema', 'schema.csv')
        schema, _, models = utils.init_schema(schema_filename)
        self.assertEqual(set(models), set(utils.get_models(schema).values()))

        with self.assertRaises(werkzeug.exceptions.BadRequest):
            web_service.get_model(models, 'Parent2')
예제 #5
0
    def test_get_models(self):
        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema, _, models = utils.init_schema(
            'tests/fixtures/declarative_schema/schema.csv',
            out_filename=out_filename)
        self.assertEqual(set(models), set(utils.get_models(schema).values()))
        self.assertEqual(sorted(utils.get_models(schema).keys()),
                         ['Child', 'Parent', 'Quantity'])

        schema = utils.get_schema(out_filename)
        self.assertEqual(sorted(utils.get_models(schema).keys()),
                         ['Child', 'Parent', 'Quantity'])
예제 #6
0
    def test_extra_attributes(self):
        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema, _, _ = utils.init_schema(
            'tests/fixtures/declarative_schema/schema.csv',
            out_filename=out_filename)

        p_0 = schema.Parent(id='p_0')
        c_0 = p_0.children.create(id='c_0')
        c_1 = p_0.children.create(id='c_1')
        c_2 = p_0.children.create(id='c_2')

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])

        wb = wc_utils.workbook.io.read(filename)
        wb['!!Child'][2].append('!Extra')
        wc_utils.workbook.io.write(filename, wb)

        p_0_b = obj_tables.io.WorkbookReader().run(
            filename,
            models=[schema.Parent, schema.Child],
            ignore_extra_attributes=True)[schema.Parent][0]
        self.assertTrue(p_0_b.is_equal(p_0))

        with self.assertRaisesRegex(ValueError,
                                    'does not match any attribute'):
            obj_tables.io.WorkbookReader().run(
                filename,
                models=[schema.Parent, schema.Child],
                ignore_extra_attributes=False)[schema.Parent][0]

        wb = wc_utils.workbook.io.read(filename)
        wb['!!Child'][2][-1] = '!Extra'
        wc_utils.workbook.io.write(filename, wb)
        obj_tables.io.WorkbookReader().run(
            filename,
            models=[schema.Parent, schema.Child],
            ignore_extra_attributes=True)[schema.Parent][0]
        with self.assertRaisesRegex(ValueError,
                                    'does not match any attribute'):
            obj_tables.io.WorkbookReader().run(
                filename,
                models=[schema.Parent, schema.Child],
                ignore_extra_attributes=False)[schema.Parent][0]
예제 #7
0
    def test_comments(self):
        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema, _, _ = utils.init_schema(
            'tests/fixtures/declarative_schema/schema.csv',
            out_filename=out_filename)

        p_0 = schema.Parent(id='p_0')
        p_0._comments = ['X', 'Y']
        c_0 = p_0.children.create(id='c_0')
        c_1 = p_0.children.create(id='c_1')
        c_2 = p_0.children.create(id='c_2')
        c_0._comments = ['A', 'B']
        c_1._comments = ['C', 'D']

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])

        wb = wc_utils.workbook.io.read(filename)
        wb['!!Parent'].insert(0, wc_utils.workbook.Row(['%/ V /%']))
        wb['!!Parent'].insert(0, wc_utils.workbook.Row([]))
        wb['!!Parent'].insert(3, wc_utils.workbook.Row([]))
        wb['!!Parent'].insert(3, wc_utils.workbook.Row(['%/ W /%']))
        wb['!!Parent'][5].append('%/ Z /%')
        wb['!!Child'].insert(0, wc_utils.workbook.Row(['%/ 123 /%']))
        wb['!!Child'].append(wc_utils.workbook.Row(['%/ 456 /%']))
        wc_utils.workbook.io.write(filename, wb)

        p_0_b = obj_tables.io.WorkbookReader().run(
            filename, models=[schema.Parent, schema.Child])[schema.Parent][0]

        self.assertTrue(p_0_b.is_equal(p_0))
        self.assertEqual(p_0_b._comments, ['V', 'W'] + p_0._comments + ['Z'])
        self.assertEqual(
            p_0_b.children.get_one(id='c_0')._comments,
            ['123'] + c_0._comments)
        self.assertEqual(
            p_0_b.children.get_one(id='c_1')._comments, c_1._comments)
        self.assertEqual(
            p_0_b.children.get_one(id='c_2')._comments,
            c_2._comments + ['456'])
예제 #8
0
    def test_init_schema(self):
        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema, _, _ = utils.init_schema(
            'tests/fixtures/declarative_schema/schema.csv',
            out_filename=out_filename)

        p_0 = schema.Parent(id='p_0')
        c_0 = p_0.children.create(id='c_0')
        c_1 = p_0.children.create(id='c_1')
        c_2 = p_0.children.create(id='c_2')
        c_0._comments = ['A', 'B']
        c_1._comments = ['C', 'D']

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])
        p_0_b = obj_tables.io.WorkbookReader().run(
            filename, models=[schema.Parent, schema.Child])[schema.Parent][0]

        self.assertTrue(p_0_b.is_equal(p_0))
        self.assertEqual(
            p_0_b.children.get_one(id='c_0')._comments, c_0._comments)
        self.assertEqual(
            p_0_b.children.get_one(id='c_1')._comments, c_1._comments)
        self.assertEqual(
            p_0_b.children.get_one(id='c_2')._comments, c_2._comments)

        # import module and test
        schema = utils.get_schema(out_filename)

        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])
        p_0_b = obj_tables.io.WorkbookReader().run(
            filename, models=[schema.Parent, schema.Child])[schema.Parent][0]

        self.assertTrue(p_0_b.is_equal(p_0))
예제 #9
0
    def test_extra_sheets(self):
        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema, _, _ = utils.init_schema(
            'tests/fixtures/declarative_schema/schema.csv',
            out_filename=out_filename)

        p_0 = schema.Parent(id='p_0')
        c_0 = p_0.children.create(id='c_0')
        c_1 = p_0.children.create(id='c_1')
        c_2 = p_0.children.create(id='c_2')

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])

        wb = wc_utils.workbook.io.read(filename)
        wb['Extra'] = wc_utils.workbook.Worksheet()
        wb['Extra'].append(
            wc_utils.workbook.Row([
                "!!ObjTables type='Data' class='Extra' objTablesVersion='{}'".
                format(obj_tables.__version__)
            ]))
        wb['!!' + core.SCHEMA_SHEET_NAME] = wc_utils.workbook.Worksheet()
        wb['!!' + core.SCHEMA_SHEET_NAME].append(
            wc_utils.workbook.Row([
                "!!ObjTables type='{}' objTablesVersion='{}'".format(
                    core.SCHEMA_TABLE_TYPE, obj_tables.__version__)
            ]))
        wc_utils.workbook.io.write(filename, wb)

        p_0_b = obj_tables.io.WorkbookReader().run(
            filename, models=[schema.Parent, schema.Child])[schema.Parent][0]
        self.assertTrue(p_0_b.is_equal(p_0))

        wb = wc_utils.workbook.io.read(filename)
        wb['!!Extra'] = wb.pop('Extra')
        wc_utils.workbook.io.write(filename, wb)
        with self.assertRaisesRegex(ValueError, 'No matching models'):
            obj_tables.io.WorkbookReader().run(
                filename, models=[schema.Parent, schema.Child])
예제 #10
0
    def test_init_schema_from_csv_workbook(self):
        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema_csv = 'tests/fixtures/declarative_schema/schema*.csv'
        schema_csv_wb = os.path.join(self.tmp_dirname, 'schema-*.csv')

        wb = wc_utils.workbook.io.read(schema_csv)
        wb['!!' + core.SCHEMA_SHEET_NAME] = wb.pop('')
        wc_utils.workbook.io.write(schema_csv_wb, wb)

        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema, _, _ = utils.init_schema(schema_csv_wb,
                                         out_filename=out_filename)

        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])
        p_0_b = obj_tables.io.WorkbookReader().run(
            filename, models=[schema.Parent, schema.Child])[schema.Parent][0]

        self.assertTrue(p_0_b.is_equal(p_0))

        # import module and test
        schema = utils.get_schema(out_filename)

        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])
        p_0_b = obj_tables.io.WorkbookReader().run(
            filename, models=[schema.Parent, schema.Child])[schema.Parent][0]

        self.assertTrue(p_0_b.is_equal(p_0))
예제 #11
0
    def test_gen_template(self):
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema', 'schema.csv')
        schema, _, models = utils.init_schema(schema_filename)
        self.assertEqual(set(models), set(utils.get_models(schema).values()))

        client = web_service.app.test_client()
        with open(schema_filename, 'rb') as schema_file:
            rv = client.post('/api/gen-template/',
                             data={
                                 'schema': (schema_file,
                                            os.path.basename(schema_filename)),
                                 'format':
                                 'xlsx',
                             })
        self.assertEqual(rv.status_code, 200)
        workbook_filename = os.path.join(self.tempdir, 'file.xlsx')
        with open(workbook_filename, 'wb') as file:
            file.write(rv.data)

        objs = io.WorkbookReader().run(workbook_filename,
                                       models=models,
                                       group_objects_by_model=False)
        self.assertEqual(objs, None)

        # invalid schema
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema',
                                       'invalid-schema.csv')
        with open(schema_filename, 'rb') as schema_file:
            rv = client.post('/api/gen-template/',
                             data={
                                 'schema': (schema_file,
                                            os.path.basename(schema_filename)),
                                 'format':
                                 'xlsx',
                             })
        self.assertEqual(rv.status_code, 400)
예제 #12
0
    def do_sbtab_sbml_examples(self, action):
        dirname = os.path.join('examples', 'sbtab')
        schema_filename = os.path.join(dirname, 'SBtab.tsv')

        # Initalize Python module which implements schema
        py_module_filename = os.path.join(dirname, 'SBtab.py')
        with __main__.App(
                argv=['init-schema', schema_filename, py_module_filename
                      ]) as app:
            app.run()

        # Visualize schema
        diagram_filename = os.path.join(dirname, 'SBtab.svg')
        with __main__.App(
                argv=['viz-schema', schema_filename, diagram_filename]) as app:
            app.run()

        # Generate a template for the schema
        template_filename = os.path.join(dirname, 'template.xlsx')
        with __main__.App(argv=[
                'gen-template', schema_filename, template_filename,
                '--write-schema', '--write-toc'
        ]) as app:
            app.run()

        # Validate that documents adhere to the schema
        data_filenames = [
            'template.xlsx',
            'hynne.multi.tsv',
            'feed_forward_loop_relationship.multi.tsv',
            'kegg_reactions_cc_ph70_quantity.multi.tsv',
            'yeast_transcription_network_chang_2008_relationship.multi.tsv',
            'simple_examples/1.multi.tsv',
            'simple_examples/2.multi.tsv',
            'simple_examples/3.multi.tsv',
            'simple_examples/4.multi.tsv',
            'simple_examples/5.multi.tsv',
            'simple_examples/6.multi.tsv',
            'simple_examples/7.multi.tsv',
            'simple_examples/8.multi.tsv',
            'simple_examples/9.multi.tsv',
            'simple_examples/10.multi.tsv',
            'teusink_data.multi.tsv',
            'teusink_model.multi.tsv',
            'jiang_data.multi.tsv',
            'jiang_model.multi.tsv',
            'ecoli_noor_2016_data.multi.tsv',
            'ecoli_noor_2016_model.multi.tsv',
            'ecoli_wortel_2018_data.multi.tsv',
            'ecoli_wortel_2018_model.multi.tsv',
            'sigurdsson_model.multi.tsv',
            'layout_model.multi.tsv',
        ]

        schema, _, models = utils.init_schema(schema_filename)
        self.assertEqual(set(models), set(utils.get_models(schema).values()))

        for data_filename in data_filenames:
            full_data_filename = os.path.join(dirname, data_filename)

            if action == 'validate':
                with __main__.App(
                        argv=['validate', schema_filename, full_data_filename
                              ]) as app:
                    app.run()

                if not data_filename.endswith('.xlsx'):
                    with __main__.App(argv=[
                            'validate',
                            schema_filename.replace('.tsv', '.csv'),
                            full_data_filename.replace('.multi.tsv',
                                                       '.multi.csv'),
                    ]) as app:
                        app.run()

                    objs_tsv = io.Reader().run(full_data_filename,
                                               models=models,
                                               **DEFAULT_READER_ARGS)
                    objs_csv = io.Reader().run(full_data_filename.replace(
                        '.multi.tsv', '.multi.csv'),
                                               models=models,
                                               **DEFAULT_READER_ARGS)
                    for cls in objs_tsv.keys():
                        for obj_tsv, obj_csv in zip(objs_tsv[cls],
                                                    objs_csv[cls]):
                            self.assertTrue(obj_tsv.is_equal(obj_csv))

            if action == 'convert' and not data_filename.endswith('.xlsx'):
                convert_filename = data_filename \
                    .replace('.multi.tsv', '.xlsx')
                full_convert_filename = os.path.join('examples', 'sbtab',
                                                     convert_filename)
                with __main__.App(argv=[
                        'convert',
                        schema_filename,
                        full_data_filename,
                        full_convert_filename,
                ]) as app:
                    app.run()

                convert_filename = data_filename \
                    .replace('.multi.tsv', '.tsv/*.tsv')
                full_convert_filename = os.path.join('examples', 'sbtab',
                                                     convert_filename)
                if not os.path.isdir(os.path.dirname(full_convert_filename)):
                    os.mkdir(os.path.dirname(full_convert_filename))
                with __main__.App(argv=[
                        'convert',
                        schema_filename,
                        full_data_filename,
                        full_convert_filename,
                ]) as app:
                    app.run()

                convert_filename = data_filename \
                    .replace('.multi.tsv', '.csv/*.csv')
                full_convert_filename = os.path.join('examples', 'sbtab',
                                                     convert_filename)
                if not os.path.isdir(os.path.dirname(full_convert_filename)):
                    os.mkdir(os.path.dirname(full_convert_filename))
                with __main__.App(argv=[
                        'convert',
                        schema_filename,
                        full_data_filename,
                        full_convert_filename,
                ]) as app:
                    app.run()

                convert_filename = data_filename \
                    .replace('.multi.tsv', '.json')
                full_convert_filename = os.path.join('examples', 'sbtab',
                                                     convert_filename)
                with __main__.App(argv=[
                        'convert',
                        schema_filename,
                        full_data_filename,
                        full_convert_filename,
                ]) as app:
                    app.run()

                convert_filename = data_filename \
                    .replace('.multi.tsv', '.yml')
                full_convert_filename = os.path.join('examples', 'sbtab',
                                                     convert_filename)
                with __main__.App(argv=[
                        'convert',
                        schema_filename,
                        full_data_filename,
                        full_convert_filename,
                ]) as app:
                    app.run()

        if action == 'validate':
            with __main__.App(argv=[
                    'validate',
                    schema_filename.replace('.tsv', '.xlsx'),
                    os.path.join(dirname, data_filenames[0]),
            ]) as app:
                app.run()
예제 #13
0
    def test_init_schema_from_xlsx(self):
        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema_csv = 'tests/fixtures/declarative_schema/schema*.csv'
        schema_xl = os.path.join(self.tmp_dirname, 'schema.xlsx')

        wb = wc_utils.workbook.io.read(schema_csv)
        wb['!!' + core.SCHEMA_SHEET_NAME] = wb.pop('')
        wc_utils.workbook.io.write(schema_xl, wb)

        out_filename = os.path.join(self.tmp_dirname, 'schema.py')
        schema, _, _ = utils.init_schema(schema_xl, out_filename=out_filename)

        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])
        p_0_b = obj_tables.io.WorkbookReader().run(
            filename, models=[schema.Parent, schema.Child])[schema.Parent][0]

        self.assertTrue(p_0_b.is_equal(p_0))

        # import module and test
        schema = utils.get_schema(out_filename)

        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')

        filename = os.path.join(self.tmp_dirname, 'data.xlsx')
        obj_tables.io.WorkbookWriter().run(
            filename, [p_0], models=[schema.Parent, schema.Child])
        p_0_b = obj_tables.io.WorkbookReader().run(
            filename, models=[schema.Parent, schema.Child])[schema.Parent][0]

        self.assertTrue(p_0_b.is_equal(p_0))

        # invalid schema
        schema_xl_2 = os.path.join(self.tmp_dirname, 'schema-invalid.xlsx')

        wb = wc_utils.workbook.io.read(schema_xl)
        wb[core.SCHEMA_SHEET_NAME] = wb.pop('!!' + core.SCHEMA_SHEET_NAME)
        wc_utils.workbook.io.write(schema_xl_2, wb)
        with self.assertRaisesRegex(ValueError, 'must contain a sheet'):
            utils.init_schema(schema_xl_2, out_filename=out_filename)

        wb = wc_utils.workbook.io.read(schema_xl)
        wb['!!' + core.SCHEMA_SHEET_NAME][0][0] = wb[
            '!!' + core.SCHEMA_SHEET_NAME][0][0].replace(
                "type='{}'".format(core.SCHEMA_TABLE_TYPE),
                "id='{}'".format('my' + core.SCHEMA_TABLE_TYPE))
        wc_utils.workbook.io.write(schema_xl_2, wb)
        with self.assertRaisesRegex(ValueError, 'type of the schema must be'):
            utils.init_schema(schema_xl_2, out_filename=out_filename)

        wb = wc_utils.workbook.io.read(schema_xl)
        wb['!!' + core.SCHEMA_SHEET_NAME][3][0] = wb[
            '!!' + core.SCHEMA_SHEET_NAME][3][0] + '?'
        wc_utils.workbook.io.write(schema_xl_2, wb)
        with self.assertRaisesRegex(ValueError, 'names must consist of'):
            utils.init_schema(schema_xl_2, out_filename=out_filename)
예제 #14
0
    def test_validate(self):
        client = web_service.app.test_client()

        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema', 'schema.csv')
        schema, _, models = utils.init_schema(schema_filename)
        self.assertEqual(set(models), set(utils.get_models(schema).values()))

        # valid XLSX file
        wb_filename = os.path.join(self.tempdir, 'wb.xlsx')
        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')
        io.WorkbookWriter().run(wb_filename, [p_0], models=models)

        with open(schema_filename, 'rb') as schema_file:
            with open(wb_filename, 'rb') as wb_file:
                rv = client.post('/api/validate/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (wb_file, os.path.basename(wb_filename)),
                                 })

        self.assertEqual(rv.status_code, 200)
        self.assertEqual(rv.json, 'The dataset is valid')

        # invalid extension
        with open(schema_filename, 'rb') as schema_file:
            with open(wb_filename, 'rb') as wb_file:
                rv = client.post(
                    '/api/validate/',
                    data={
                        'schema':
                        (schema_file, os.path.basename(schema_filename)),
                        'workbook':
                        (wb_file, os.path.basename(wb_filename) + '-invalid'),
                    })
        self.assertEqual(rv.status_code, 400)

        # valid csv files
        wb_filename_2 = os.path.join(self.tempdir, '*.csv')
        wb = wc_utils.workbook.io.read(wb_filename)
        wc_utils.workbook.io.write(wb_filename_2, wb)

        wb_filename_3 = os.path.join(self.tempdir, 'wb.zip')
        zip_file = zipfile.ZipFile(wb_filename_3, mode='w')
        for filename in glob.glob(wb_filename_2):
            zip_file.write(filename, arcname=os.path.basename(filename))
        zip_file.close()

        with open(schema_filename, 'rb') as schema_file:
            with open(wb_filename_3, 'rb') as wb_file:
                rv = client.post('/api/validate/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (wb_file,
                                      os.path.basename(wb_filename_3)),
                                 })

        self.assertEqual(rv.status_code, 200)
        self.assertEqual(rv.json, 'The dataset is valid')

        # invalid tsv files
        wb_filename_4 = os.path.join(self.tempdir, '*.tsv')
        wb = wc_utils.workbook.io.read(wb_filename)
        wb['!!Child'][4][0] = 'c_0'
        wc_utils.workbook.io.write(wb_filename_4, wb)

        wb_filename_5 = os.path.join(self.tempdir, 'wb2.zip')
        zip_file = zipfile.ZipFile(wb_filename_5, mode='w')
        for filename in glob.glob(wb_filename_4):
            zip_file.write(filename, arcname=os.path.basename(filename))
        zip_file.close()

        with open(schema_filename, 'rb') as schema_file:
            with open(wb_filename_5, 'rb') as wb_file:
                rv = client.post('/api/validate/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (wb_file,
                                      os.path.basename(wb_filename_5)),
                                 })

        self.assertEqual(rv.status_code, 200)
        self.assertNotEqual(rv.json, 'The dataset is valid')

        # invalid csv and tsv files
        wb_filename_6 = os.path.join(self.tempdir, 'wb3.zip')
        zip_file = zipfile.ZipFile(wb_filename_6, mode='w')
        for filename in glob.glob(wb_filename_2):
            zip_file.write(filename, arcname=os.path.basename(filename))
        for filename in glob.glob(wb_filename_4):
            zip_file.write(filename, arcname=os.path.basename(filename))
        zip_file.close()

        with open(schema_filename, 'rb') as schema_file:
            with open(wb_filename_6, 'rb') as wb_file:
                rv = client.post('/api/validate/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (wb_file,
                                      os.path.basename(wb_filename_6)),
                                 })

        self.assertEqual(rv.status_code, 400)

        # invalid schema
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema',
                                       'invalid-schema.csv')
        with open(schema_filename, 'rb') as schema_file:
            with open(wb_filename, 'rb') as wb_file:
                rv = client.post('/api/validate/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (wb_file, os.path.basename(wb_filename)),
                                 })
        self.assertEqual(rv.status_code, 400)
예제 #15
0
    def test_normalize(self):
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema', 'schema.csv')
        schema, _, models = utils.init_schema(schema_filename)
        self.assertEqual(set(models), set(utils.get_models(schema).values()))

        in_workbook_filename = os.path.join(self.tempdir, 'file1.xlsx')
        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')
        io.WorkbookWriter().run(in_workbook_filename, [p_0], models=models)

        client = web_service.app.test_client()

        # to xlsx
        with open(schema_filename, 'rb') as schema_file:
            with open(in_workbook_filename, 'rb') as in_workbook_file:
                rv = client.post('/api/normalize/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'model':
                                     'Parent',
                                     'workbook':
                                     (in_workbook_file,
                                      os.path.basename(in_workbook_filename)),
                                     'format':
                                     'xlsx',
                                 })
        self.assertEqual(rv.status_code, 200)
        out_workbook_file = os.path.join(self.tempdir, 'file2.xlsx')
        with open(out_workbook_file, 'wb') as file:
            file.write(rv.data)

        p_0_b = io.WorkbookReader().run(
            out_workbook_file, models=models,
            ignore_missing_attributes=True)[schema.Parent][0]
        self.assertTrue(p_0_b.is_equal(p_0))

        # to tsv
        with open(schema_filename, 'rb') as schema_file:
            with open(in_workbook_filename, 'rb') as in_workbook_file:
                rv = client.post('/api/normalize/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'model':
                                     'Parent',
                                     'workbook':
                                     (in_workbook_file,
                                      os.path.basename(in_workbook_filename)),
                                     'format':
                                     'tsv',
                                 })
        self.assertEqual(rv.status_code, 200)
        out_workbook_file = os.path.join(self.tempdir, '*.tsv')
        with zipfile.ZipFile(BytesIO(rv.data)) as zip_file:
            zip_file.extractall(self.tempdir)

        p_0_b = io.WorkbookReader().run(
            out_workbook_file, models=models,
            ignore_missing_attributes=True)[schema.Parent][0]
        self.assertTrue(p_0_b.is_equal(p_0))

        # invalid workbook
        wb = wc_utils.workbook.io.read(in_workbook_filename)
        wb['!!Child2'] = wb.pop('!!Child')
        wb['!!Child2'][0][0] = wb['!!Child2'][0][0].replace(
            "'Child'", "'Child2'")
        wc_utils.workbook.io.write(in_workbook_filename, wb)

        with open(schema_filename, 'rb') as schema_file:
            with open(in_workbook_filename, 'rb') as in_workbook_file:
                rv = client.post('/api/normalize/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'model':
                                     'Parent',
                                     'workbook':
                                     (in_workbook_file,
                                      os.path.basename(in_workbook_filename)),
                                     'format':
                                     'xlsx',
                                 })
        self.assertEqual(rv.status_code, 400)

        # invalid schema
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema',
                                       'invalid-schema.csv')
        with open(schema_filename, 'rb') as schema_file:
            with open(in_workbook_filename, 'rb') as in_workbook_file:
                rv = client.post('/api/normalize/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'model':
                                     'Parent',
                                     'workbook':
                                     (in_workbook_file,
                                      os.path.basename(in_workbook_filename)),
                                     'format':
                                     'xlsx',
                                 })
        self.assertEqual(rv.status_code, 400)
예제 #16
0
    def test_convert(self):
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema', 'schema.csv')
        schema, _, models = utils.init_schema(schema_filename)
        self.assertEqual(set(models), set(utils.get_models(schema).values()))

        workbook_filename_1 = os.path.join(self.tempdir, 'file1.xlsx')
        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0')
        p_0.children.create(id='c_1')
        io.WorkbookWriter().run(workbook_filename_1, [p_0], models=models)

        # XLSX -> XLSX
        client = web_service.app.test_client()
        with open(schema_filename, 'rb') as schema_file:
            with open(workbook_filename_1, 'rb') as workbook_file:
                rv = client.post('/api/convert/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (workbook_file,
                                      os.path.basename(workbook_filename_1)),
                                     'format':
                                     'xlsx',
                                 })
        self.assertEqual(rv.status_code, 200)
        workbook_filename_2 = os.path.join(self.tempdir, 'file2.xlsx')
        with open(workbook_filename_2, 'wb') as file:
            file.write(rv.data)

        p_0_b = io.WorkbookReader().run(
            workbook_filename_2, models=models,
            ignore_missing_attributes=True)[schema.Parent][0]
        self.assertTrue(p_0_b.is_equal(p_0))

        # XLSX -> multi.csv
        client = web_service.app.test_client()
        with open(schema_filename, 'rb') as schema_file:
            with open(workbook_filename_1, 'rb') as workbook_file:
                rv = client.post('/api/convert/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (workbook_file,
                                      os.path.basename(workbook_filename_1)),
                                     'format':
                                     'multi.csv',
                                 })
        self.assertEqual(rv.status_code, 200)
        workbook_filename_3 = os.path.join(self.tempdir, 'file3.csv')
        with open(workbook_filename_3, 'wb') as file:
            file.write(rv.data)

        p_0_c = io.MultiSeparatedValuesReader().run(
            workbook_filename_3, models=models,
            ignore_missing_attributes=True)[schema.Parent][0]
        self.assertTrue(p_0_c.is_equal(p_0))

        # XLSX -> JSON
        client = web_service.app.test_client()
        with open(schema_filename, 'rb') as schema_file:
            with open(workbook_filename_1, 'rb') as workbook_file:
                rv = client.post('/api/convert/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (workbook_file,
                                      os.path.basename(workbook_filename_1)),
                                     'format':
                                     'json',
                                 })
        self.assertEqual(rv.status_code, 200)
        workbook_filename_4 = os.path.join(self.tempdir, 'file4.json')
        with open(workbook_filename_4, 'wb') as file:
            file.write(rv.data)

        p_0_d = io.JsonReader().run(
            workbook_filename_4, models=models,
            ignore_missing_attributes=True)[schema.Parent][0]
        self.assertTrue(p_0_d.is_equal(p_0))

        # JSON -> YAML
        client = web_service.app.test_client()
        with open(schema_filename, 'rb') as schema_file:
            with open(workbook_filename_4, 'rb') as workbook_file:
                rv = client.post('/api/convert/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (workbook_file,
                                      os.path.basename(workbook_filename_4)),
                                     'format':
                                     'yml',
                                 })
        self.assertEqual(rv.status_code, 200)
        workbook_filename_5 = os.path.join(self.tempdir, 'file5.yml')
        with open(workbook_filename_5, 'wb') as file:
            file.write(rv.data)

        p_0_e = io.JsonReader().run(
            workbook_filename_5, models=models,
            ignore_missing_attributes=True)[schema.Parent][0]
        self.assertTrue(p_0_e.is_equal(p_0))

        # invalid schema
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema',
                                       'invalid-schema.csv')
        with open(schema_filename, 'rb') as schema_file:
            with open(workbook_filename_1, 'rb') as workbook_file:
                rv = client.post('/api/convert/',
                                 data={
                                     'schema':
                                     (schema_file,
                                      os.path.basename(schema_filename)),
                                     'workbook':
                                     (workbook_file,
                                      os.path.basename(workbook_filename_1)),
                                     'format':
                                     'xlsx',
                                 })
        self.assertEqual(rv.status_code, 400)
예제 #17
0
    def test_diff(self):
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema', 'schema.csv')
        schema, _, models = utils.init_schema(schema_filename)
        self.assertEqual(set(models), set(utils.get_models(schema).values()))

        xl_file_1 = os.path.join(self.tempdir, 'file1.xlsx')
        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0', name='c_0')
        p_0.children.create(id='c_1', name='c_1')
        io.WorkbookWriter().run(xl_file_1, [p_0], models=models)

        xl_file_2 = os.path.join(self.tempdir, 'file2.xlsx')
        p_0 = schema.Parent(id='p_0')
        p_0.children.create(id='c_0', name='c_0')
        p_0.children.create(id='c_1', name='c_0')
        io.WorkbookWriter().run(xl_file_2, [p_0], models=models)

        client = web_service.app.test_client()

        with open(schema_filename, 'rb') as schema_file:
            with open(xl_file_1, 'rb') as wb_file_1:
                with open(xl_file_1, 'rb') as wb_file_2:
                    rv = client.post(
                        '/api/diff/',
                        data={
                            'schema':
                            (schema_file, os.path.basename(schema_filename)),
                            'model':
                            'Parent',
                            'workbook':
                            (wb_file_1, os.path.basename(xl_file_1)),
                            'workbook-2': (wb_file_2,
                                           os.path.basename(xl_file_1)),
                        })
        self.assertEqual(rv.status_code, 200)
        self.assertEqual(rv.json, [])

        with open(schema_filename, 'rb') as schema_file:
            with open(xl_file_1, 'rb') as wb_file_1:
                with open(xl_file_2, 'rb') as wb_file_2:
                    rv = client.post(
                        '/api/diff/',
                        data={
                            'schema':
                            (schema_file, os.path.basename(schema_filename)),
                            'model':
                            'Parent',
                            'workbook':
                            (wb_file_1, os.path.basename(xl_file_1)),
                            'workbook-2': (wb_file_2,
                                           os.path.basename(xl_file_2)),
                        })
        self.assertEqual(rv.status_code, 200)
        self.assertNotEqual(rv.json, [])

        # invalid workbook
        wb = wc_utils.workbook.io.read(xl_file_2)
        wb['!!Child2'] = wb.pop('!!Child')
        wb['!!Child2'][0][0] = wb['!!Child2'][0][0].replace(
            "'Child'", "'Child2'")
        wc_utils.workbook.io.write(xl_file_2, wb)

        with open(schema_filename, 'rb') as schema_file:
            with open(xl_file_1, 'rb') as wb_file_1:
                with open(xl_file_2, 'rb') as wb_file_2:
                    rv = client.post(
                        '/api/diff/',
                        data={
                            'schema':
                            (schema_file, os.path.basename(schema_filename)),
                            'model':
                            'Parent',
                            'workbook':
                            (wb_file_1, os.path.basename(xl_file_1)),
                            'workbook-2': (wb_file_2,
                                           os.path.basename(xl_file_2)),
                        })
        self.assertEqual(rv.status_code, 400)

        # invalid schema
        schema_filename = os.path.join('tests', 'fixtures',
                                       'declarative_schema',
                                       'invalid-schema.csv')
        with open(schema_filename, 'rb') as schema_file:
            with open(xl_file_1, 'rb') as wb_file_1:
                with open(xl_file_2, 'rb') as wb_file_2:
                    rv = client.post(
                        '/api/diff/',
                        data={
                            'schema':
                            (schema_file, os.path.basename(schema_filename)),
                            'model':
                            'Parent',
                            'workbook':
                            (wb_file_1, os.path.basename(xl_file_1)),
                            'workbook-2': (wb_file_2,
                                           os.path.basename(xl_file_2)),
                        })
        self.assertEqual(rv.status_code, 400)
예제 #18
0
    def test_init_schema_with_inheritance(self):
        csv_filename = os.path.join(self.tmp_dirname, 'schema.csv')
        py_filename = os.path.join(self.tmp_dirname, 'schema.py')

        ws_metadata = [
            '!!ObjTables',
            "type='{}'".format(core.SCHEMA_TABLE_TYPE),
            "description='Schema'",
            "objTablesVersion='{}'".format(obj_tables.__version__),
        ]
        col_headings = [
            '!Name',
            '!Type',
            '!Parent',
            '!Format',
            '!Description',
        ]

        rows = [
            '{}\n'.format(','.join(['ClsA1', 'Class', '', 'row', ''])),
            '{}\n'.format(','.join(
                ['id_a1', 'Attribute', 'ClsA1', 'String', 'Id-a1'])),
            '{}\n'.format(','.join(['ClsA2', 'Class', 'ClsA1', 'row', ''])),
            '{}\n'.format(','.join(
                ['id_a2', 'Attribute', 'ClsA2', 'String', 'Id-a2'])),
            '{}\n'.format(','.join(['ClsA30', 'Class', 'ClsA2', 'row', ''])),
            '{}\n'.format(','.join(
                ['id_a30', 'Attribute', 'ClsA30', 'String', 'Id-a30'])),
            '{}\n'.format(','.join(['ClsA31', 'Class', 'ClsA2', 'row', ''])),
            '{}\n'.format(','.join(
                ['id_a31', 'Attribute', 'ClsA31', 'String', 'Id-a31'])),
            '{}\n'.format(','.join(['ClsB1', 'Class', '', 'row', ''])),
            '{}\n'.format(','.join(
                ['id_b1', 'Attribute', 'ClsB1', 'String', 'Id-b1'])),
            '{}\n'.format(','.join(['ClsB2', 'Class', 'ClsB1', 'row', ''])),
            '{}\n'.format(','.join(
                ['id_b2', 'Attribute', 'ClsB2', 'String', 'Id-b2'])),
            '{}\n'.format(','.join(['ClsB30', 'Class', 'ClsB2', 'row', ''])),
            '{}\n'.format(','.join(
                ['id_b30', 'Attribute', 'ClsB30', 'String', 'Id-b30'])),
            '{}\n'.format(','.join(['ClsB31', 'Class', 'ClsB2', 'row', ''])),
            '{}\n'.format(','.join(
                ['id_b31', 'Attribute', 'ClsB31', 'String', 'Id-b31'])),
        ]

        with open(csv_filename, 'w') as file:
            file.write('{}\n'.format(' '.join(ws_metadata)))
            file.write('{}\n'.format(','.join(col_headings)))
            file.write(''.join(rows))

        schema, _, _ = utils.init_schema(csv_filename,
                                         out_filename=py_filename)
        b31 = schema.ClsB31(id_b1='l1', id_b2='l2', id_b31='l3')
        self.assertEqual(b31.id_b1, 'l1')
        self.assertEqual(b31.id_b2, 'l2')
        self.assertEqual(b31.id_b31, 'l3')
        self.assertIsInstance(b31, schema.ClsB31)
        self.assertIsInstance(b31, schema.ClsB2)
        self.assertIsInstance(b31, schema.ClsB1)
        self.assertIsInstance(b31, obj_tables.Model)
        self.assertNotIsInstance(b31, schema.ClsA31)

        self.assertEqual(set(get_subclasses(schema.ClsA1)),
                         set([schema.ClsA2, schema.ClsA30, schema.ClsA31]))
        self.assertEqual(set(get_subclasses(schema.ClsA2)),
                         set([schema.ClsA30, schema.ClsA31]))
        self.assertEqual(set(get_subclasses(schema.ClsA30)), set())

        # check that Python file generated correctly
        schema = utils.get_schema(py_filename)
        b31 = schema.ClsB31(id_b1='l1', id_b2='l2', id_b31='l3')
        self.assertEqual(b31.id_b1, 'l1')
        self.assertEqual(b31.id_b2, 'l2')
        self.assertEqual(b31.id_b31, 'l3')
        self.assertEqual(set(get_subclasses(schema.ClsA1)),
                         set([schema.ClsA2, schema.ClsA30, schema.ClsA31]))

        # check that definition works with random order of rows
        with open(csv_filename, 'w') as file:
            file.write('{}\n'.format(' '.join(ws_metadata)))
            file.write('{}\n'.format(','.join(col_headings)))
            for i_row in numpy.random.permutation(len(rows)):
                file.write(rows[i_row])

        schema, _, _ = utils.init_schema(csv_filename)
        b31 = schema.ClsB31(id_b1='l1', id_b2='l2', id_b31='l3')
        self.assertEqual(b31.id_b1, 'l1')
        self.assertEqual(b31.id_b2, 'l2')
        self.assertEqual(b31.id_b31, 'l3')
        self.assertEqual(set(get_subclasses(schema.ClsA1)),
                         set([schema.ClsA2, schema.ClsA30, schema.ClsA31]))
예제 #19
0
    def test_init_schema_errors(self):
        with self.assertRaisesRegex(ValueError, 'format is not supported'):
            utils.init_schema(os.path.join(self.tmp_dirname, 'schema.txt'))

        filename = os.path.join(self.tmp_dirname, 'schema.csv')
        ws_metadata = [
            '!!ObjTables',
            "type='{}'".format(core.SCHEMA_TABLE_TYPE),
            "description='Schema'",
            "objTablesVersion='{}'".format(obj_tables.__version__),
        ]

        col_headings = [
            '!Name',
            '!Type',
            '!Parent',
            '!Format',
            '!Description',
        ]

        with open(filename, 'w') as file:
            file.write('{}\n'.format(' '.join(ws_metadata)))
            file.write('{}\n'.format(','.join(col_headings)))
            file.write('{}\n'.format(','.join(
                ['Cls1', 'Class', 'Cls2', 'column', ''])))
            file.write('{}\n'.format(','.join(
                ['Cls2', 'Class', 'Cls1', 'column', ''])))
        with self.assertRaisesRegex(ValueError, 'must be acyclic'):
            utils.init_schema(filename)

        with open(filename, 'w') as file:
            file.write('{}\n'.format(' '.join(ws_metadata)))
            file.write('{}\n'.format(','.join(col_headings)))
            file.write('{}\n'.format(','.join(
                ['Attr1', 'Attribute', 'Cls1', 'String', 'attr1'])))
            file.write('{}\n'.format(','.join(
                ['Attr1', 'Attribute', 'Cls1', 'String', 'attr2'])))
        with self.assertRaisesRegex(ValueError, 'can only be defined once'):
            utils.init_schema(filename)

        with open(filename, 'w') as file:
            file.write('{}\n'.format(' '.join(ws_metadata)))
            file.write('{}\n'.format(','.join(col_headings)))
            file.write('{}\n'.format(','.join(['1Cls', 'Class', '', 'row',
                                               ''])))
        with self.assertRaisesRegex(ValueError, 'must start'):
            utils.init_schema(filename)

        with open(filename, 'w') as file:
            file.write('{}\n'.format(' '.join(ws_metadata)))
            file.write('{}\n'.format(','.join(col_headings)))
            file.write('{}\n'.format(','.join(
                ['Attr1', 'Attribute', '1Cls', 'String', 'attr2'])))
        with self.assertRaisesRegex(ValueError, 'must start'):
            utils.init_schema(filename)

        with open(filename, 'w') as file:
            file.write('{}\n'.format(' '.join(ws_metadata)))
            file.write('{}\n'.format(','.join(col_headings)))
            file.write('{}\n'.format(','.join(['Cls1', 'Class', '', 'row',
                                               ''])))
            file.write('{}\n'.format(','.join(['Cls1', 'Class', '', 'row',
                                               ''])))
        with self.assertRaisesRegex(ValueError, 'can only be defined once'):
            utils.init_schema(filename)

        with open(filename, 'w') as file:
            file.write('{}\n'.format(' '.join(ws_metadata)))
            file.write('{}\n'.format(','.join(col_headings)))
            file.write('{}\n'.format(','.join(
                ['Cls1', 'Unsupported', 'Doc', 'column', ''])))
        with self.assertRaisesRegex(ValueError, 'is not supported'):
            utils.init_schema(filename)