Esempio n. 1
0
    def test_duplicate_columns_self_merge(self):
        md = Metadata(pd.DataFrame(
            {'a': [1, 2], 'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='id')))

        with self.assertRaisesRegex(ValueError, "columns overlap: 'a', 'b'"):
            md.merge(md)
Esempio n. 2
0
    def test_index_and_column_merge_order(self):
        md1 = Metadata(pd.DataFrame(
            [[1], [2], [3], [4]],
            index=pd.Index(['id1', 'id2', 'id3', 'id4'], name='id'),
            columns=['a']))
        md2 = Metadata(pd.DataFrame(
            [[5], [6], [7]], index=pd.Index(['id4', 'id3', 'id1'], name='id'),
            columns=['b']))
        md3 = Metadata(pd.DataFrame(
            [[8], [9], [10]], index=pd.Index(['id1', 'id4', 'id3'], name='id'),
            columns=['c']))

        obs = md1.merge(md2, md3)

        exp = Metadata(pd.DataFrame(
            [[1, 7, 8], [3, 6, 10], [4, 5, 9]],
            index=pd.Index(['id1', 'id3', 'id4'], name='id'),
            columns=['a', 'b', 'c']))
        self.assertEqual(obs, exp)

        # Merging in different order produces different ID/column order.
        obs = md2.merge(md1, md3)

        exp = Metadata(pd.DataFrame(
            [[5, 4, 9], [6, 3, 10], [7, 1, 8]],
            index=pd.Index(['id4', 'id3', 'id1'], name='id'),
            columns=['b', 'a', 'c']))
        self.assertEqual(obs, exp)
Esempio n. 3
0
    def test_ids_and_column_names_as_numeric_strings(self):
        index = pd.Index(['0.000001', '0.004000', '0.000000'],
                         dtype=object, name='id')
        columns = ['42.0', '1000', '-4.2']
        data = [
            [2.0, 'b', 2.5],
            [1.0, 'b', 4.2],
            [3.0, 'c', -9.999]
        ]
        df = pd.DataFrame(data, index=index, columns=columns)
        md = Metadata(df)

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "id\t42.0\t1000\t-4.2\n"
            "#q2:types\tnumeric\tcategorical\tnumeric\n"
            "0.000001\t2\tb\t2.5\n"
            "0.004000\t1\tb\t4.2\n"
            "0.000000\t3\tc\t-9.999\n"
        )

        self.assertEqual(obs, exp)
Esempio n. 4
0
    def test_invalid_header(self):
        fp = get_data_path('invalid/invalid-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'unrecognized ID column name.*'
                                    'invalid_id_header'):
            Metadata.load(fp)
Esempio n. 5
0
    def test_column_types_unrecognized_column_name(self):
        fp = get_data_path('valid/simple.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'not_a_column.*column_types.*not a column '
                                    'in the metadata file'):
            Metadata.load(fp, column_types={'not_a_column': 'numeric'})
Esempio n. 6
0
    def test_duplicate_column_names_with_whitespace(self):
        fp = get_data_path(
            'invalid/duplicate-column-names-with-whitespace.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'Column names must be unique.*col1'):
            Metadata.load(fp)
Esempio n. 7
0
    def test_directive_before_header(self):
        fp = get_data_path('invalid/directive-before-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'directive.*#q2:types.*searching for '
                                    'header'):
            Metadata.load(fp)
Esempio n. 8
0
    def test_qiime1_empty_mapping_file(self):
        fp = pkg_resources.resource_filename(
            'qiime2.metadata.tests', 'data/qiime1-empty.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'at least one ID.*empty'):
            Metadata.load(fp)
Esempio n. 9
0
    def test_unrecognized_column_type_in_directive(self):
        fp = get_data_path('invalid/unrecognized-column-type.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'col2.*unrecognized column type.*foo.*'
                                    '#q2:types directive'):
            Metadata.load(fp)
Esempio n. 10
0
    def test_unrecognized_directive(self):
        fp = get_data_path('invalid/unrecognized-directive.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'Unrecognized directive.*#q2:foo.*'
                                    '#q2:types directive is supported'):
            Metadata.load(fp)
Esempio n. 11
0
    def test_data_longer_than_header(self):
        fp = get_data_path('invalid/data-longer-than-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'row has 5 cells.*header declares 4 '
                                    'cells'):
            Metadata.load(fp)
Esempio n. 12
0
    def test_path_is_directory(self):
        fp = get_data_path('valid')

        with self.assertRaisesRegex(MetadataFileError,
                                    "path points to something other than a "
                                    "file"):
            Metadata.load(fp)
Esempio n. 13
0
    def test_comments_and_empty_rows_only(self):
        fp = get_data_path('invalid/comments-and-empty-rows-only.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'locate header.*only of comments or empty '
                                    'rows'):
            Metadata.load(fp)
Esempio n. 14
0
    def test_inner_join(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [7, 8, 9], 'd': [10, 11, 12]},
            index=pd.Index(['id2', 'X', 'Y'], name='id')))
        md3 = Metadata(pd.DataFrame(
            {'e': [13, 14, 15], 'f': [16, 17, 18]},
            index=pd.Index(['X', 'id3', 'id2'], name='id')))

        # Single shared ID.
        obs = md1.merge(md2, md3)

        exp = Metadata(pd.DataFrame(
            {'a': [2], 'b': [5], 'c': [7], 'd': [10], 'e': [15], 'f': [18]},
            index=pd.Index(['id2'], name='id')))
        self.assertEqual(obs, exp)

        # Multiple shared IDs.
        obs = md1.merge(md3)

        exp = Metadata(pd.DataFrame(
            {'a': [2, 3], 'b': [5, 6], 'e': [15, 14], 'f': [18, 17]},
            index=pd.Index(['id2', 'id3'], name='id')))
        self.assertEqual(obs, exp)
Esempio n. 15
0
    def test_column_types_override_directive_not_convertible_to_numeric(self):
        fp = get_data_path('valid/simple-with-directive.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    "column 'col3' to numeric.*could not be "
                                    "interpreted as numeric: 'bar', 'foo'"):
            Metadata.load(fp, column_types={'col3': 'numeric'})
Esempio n. 16
0
    def test_empty_file(self):
        fp = pkg_resources.resource_filename(
            'qiime2.metadata.tests', 'data/empty')

        with self.assertRaisesRegex(MetadataFileError,
                                    'locate header.*file may be empty'):
            Metadata.load(fp)
Esempio n. 17
0
    def test_various_numbers(self):
        numbers = [
            0.0, -0.0, np.nan, 1.0, 42.0, -33.0, 1e-10, 1.5e15, 0.0003, -4.234,
            # This last number should be rounded because it exceeds 15 digits
            # of precision.
            12.34567891234567
        ]
        index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7',
                          'id8', 'id9', 'id10', 'id11'], name='ID')
        md = Metadata(pd.DataFrame({'numbers': numbers}, index=index))

        md.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = (
            "ID\tnumbers\n"
            "#q2:types\tnumeric\n"
            "id1\t0\n"
            "id2\t-0\n"
            "id3\t\n"
            "id4\t1\n"
            "id5\t42\n"
            "id6\t-33\n"
            "id7\t1e-10\n"
            "id8\t1.5e+15\n"
            "id9\t0.0003\n"
            "id10\t-4.234\n"
            "id11\t12.3456789123457\n"
        )

        self.assertEqual(obs, exp)
Esempio n. 18
0
    def test_merging_nothing(self):
        md = Metadata(pd.DataFrame(
            {'a': [1, 2, 3], 'b': [4, 5, 6]},
            index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        with self.assertRaisesRegex(ValueError,
                                    'At least one Metadata.*nothing to merge'):
            md.merge()
Esempio n. 19
0
    def test_numeric_column(self):
        fp = get_data_path('valid/numeric-column.tsv')
        md1 = Metadata.load(fp)

        md1.save(self.filepath)
        md2 = Metadata.load(self.filepath)

        self.assertEqual(md1, md2)
Esempio n. 20
0
    def test_minimal_file(self):
        fp = get_data_path('valid/minimal.tsv')
        md1 = Metadata.load(fp)

        md1.save(self.filepath)
        md2 = Metadata.load(self.filepath)

        self.assertEqual(md1, md2)
Esempio n. 21
0
    def test_all_cells_padded(self):
        fp = get_data_path('valid/all-cells-padded.tsv')
        md1 = Metadata.load(fp)

        md1.save(self.filepath)
        md2 = Metadata.load(self.filepath)

        self.assertEqual(md1, md2)
Esempio n. 22
0
    def test_column_name_conflicts_with_id_header(self):
        fp = get_data_path(
            'invalid/column-name-conflicts-with-id-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    "column name 'featureid' conflicts.*ID "
                                    "column header"):
            Metadata.load(fp)
Esempio n. 23
0
    def test_column_types_unrecognized_column_type(self):
        fp = get_data_path('valid/simple.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'col2.*column_types.*unrecognized column '
                                    'type.*CATEGORICAL'):
            Metadata.load(fp, column_types={'col1': 'numeric',
                                            'col2': 'CATEGORICAL'})
Esempio n. 24
0
    def test_non_standard_characters(self):
        fp = get_data_path('valid/non-standard-characters.tsv')
        md1 = Metadata.load(fp)

        md1.save(self.filepath)
        md2 = Metadata.load(self.filepath)

        self.assertEqual(md1, md2)
Esempio n. 25
0
    def test_directive_after_directives_section(self):
        fp = get_data_path(
            'invalid/directive-after-directives-section.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    '#q2:types.*outside of the directives '
                                    'section'):
            Metadata.load(fp)
Esempio n. 26
0
    def test_query_by_id(self):
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='id'))
        metadata = Metadata(df)

        actual = metadata.get_ids(where="id='S2' OR id='S1'")
        expected = {'S1', 'S2'}
        self.assertEqual(actual, expected)
Esempio n. 27
0
    def test_invalid_where(self):
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='sampleid'))
        metadata = Metadata(df)

        where = "not-a-column-name='subject-1'"
        with self.assertRaises(ValueError):
            metadata.get_ids(where)
Esempio n. 28
0
    def test_no_artifacts(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2]}, index=pd.Index(['id1', 'id2'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='id')))

        metadata = md1.merge(md2)

        self.assertEqual(metadata.artifacts, ())
Esempio n. 29
0
    def test_duplicate_columns(self):
        md1 = Metadata(pd.DataFrame(
            {'a': [1, 2], 'b': [3, 4]},
            index=pd.Index(['id1', 'id2'], name='id')))
        md2 = Metadata(pd.DataFrame(
            {'c': [5, 6], 'b': [7, 8]},
            index=pd.Index(['id1', 'id2'], name='id')))

        with self.assertRaisesRegex(ValueError, "columns overlap: 'b'"):
            md1.merge(md2)
Esempio n. 30
0
    def test_empty_result(self):
        df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'],
                           'SampleType': ['gut', 'tongue', 'gut']},
                          index=pd.Index(['S1', 'S2', 'S3'], name='id'))
        metadata = Metadata(df)

        where = "Subject='subject-3'"
        actual = metadata.get_ids(where)
        expected = set()
        self.assertEqual(actual, expected)
Esempio n. 31
0
    def test_header_only(self):
        fp = get_data_path('invalid/header-only.tsv')

        with self.assertRaisesRegex(MetadataFileError, 'at least one ID'):
            Metadata.load(fp)
Esempio n. 32
0
    def test_save_metadata_auto_extension(self):
        md = Metadata(
            pd.DataFrame(
                {
                    'col1': [1.0, 2.0, 3.0],
                    'col2': ['a', 'b', 'c'],
                    'col3': ['foo', 'bar', '42']
                },
                index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        # Filename & extension endswith is matching (non-default).
        fp = os.path.join(self.temp_dir, 'metadatatsv')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadatatsv.tsv')

        # No period in filename; no extension included.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp)
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata')

        # No period in filename; no period in extension.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp, 'tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # No period in filename; multiple periods in extension.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp, '..tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Single period in filename; no period in extension.
        fp = os.path.join(self.temp_dir, 'metadata.')
        obs_md = md.save(fp, 'tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Single period in filename; single period in extension.
        fp = os.path.join(self.temp_dir, 'metadata.')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Single period in filename; multiple periods in extension.
        fp = os.path.join(self.temp_dir, 'metadata.')
        obs_md = md.save(fp, '..tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Multiple periods in filename; single period in extension.
        fp = os.path.join(self.temp_dir, 'metadata..')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Multiple periods in filename; multiple periods in extension.
        fp = os.path.join(self.temp_dir, 'metadata..')
        obs_md = md.save(fp, '..tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # No extension in filename; no extension input.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp)
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata')

        # No extension in filename; extension input.
        fp = os.path.join(self.temp_dir, 'metadata')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Extension in filename; no extension input.
        fp = os.path.join(self.temp_dir, 'metadata.tsv')
        obs_md = md.save(fp)
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')

        # Extension in filename; extension input (non-matching).
        fp = os.path.join(self.temp_dir, 'metadata.tsv')
        obs_md = md.save(fp, '.txt')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv.txt')

        # Extension in filename; extension input (matching).
        fp = os.path.join(self.temp_dir, 'metadata.tsv')
        obs_md = md.save(fp, '.tsv')
        obs_filename = os.path.basename(obs_md)

        self.assertEqual(obs_filename, 'metadata.tsv')
Esempio n. 33
0
    def test_empty_id(self):
        fp = self.get_data_path('invalid/empty-id.tsv')

        with self.assertRaisesRegex(MetadataFileError, 'empty metadata ID'):
            Metadata.load(fp)
Esempio n. 34
0
    def test_whitespace_only_column_name(self):
        fp = get_data_path('invalid/whitespace-only-column-name.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'column without a name'):
            Metadata.load(fp)
Esempio n. 35
0
    def test_duplicate_column_names(self):
        fp = get_data_path('invalid/duplicate-column-names.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'Column names must be unique.*col1'):
            Metadata.load(fp)
Esempio n. 36
0
    def test_utf_16_be_file(self):
        fp = get_data_path('invalid/simple-utf-16be.txt')

        with self.assertRaisesRegex(MetadataFileError, 'UTF-16 Unicode'):
            Metadata.load(fp)
Esempio n. 37
0
    def test_empty_file(self):
        fp = get_data_path('invalid/empty-file')

        with self.assertRaisesRegex(MetadataFileError,
                                    'locate header.*file may be empty'):
            Metadata.load(fp)
Esempio n. 38
0
    def test_no_source_artifacts(self):
        fp = get_data_path('valid/simple.tsv')

        metadata = Metadata.load(fp)

        self.assertEqual(metadata.artifacts, ())
Esempio n. 39
0
 def test_path_does_not_exist(self):
     with self.assertRaisesRegex(MetadataFileError,
                                 "Metadata file path doesn't exist"):
         Metadata.load(
             '/qiime2/unit/tests/hopefully/this/path/does/not/exist')
Esempio n. 40
0
    def test_leading_trailing_whitespace(self):
        fp = get_data_path('valid/leading-trailing-whitespace.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Esempio n. 41
0
    def test_with_empty_types_directive(self):
        fp = get_data_path('valid/empty-types-directive.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Esempio n. 42
0
    def test_different_file_extension(self):
        fp = get_data_path('valid/simple.txt')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Esempio n. 43
0
    def test_whitespace_only_id(self):
        fp = get_data_path('invalid/whitespace-only-id.tsv')

        with self.assertRaisesRegex(MetadataFileError, 'empty metadata ID'):
            Metadata.load(fp)
Esempio n. 44
0
    def test_duplicate_directives(self):
        fp = get_data_path('invalid/duplicate-directives.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'duplicate directive.*#q2:types'):
            Metadata.load(fp)
Esempio n. 45
0
    def test_empty_column_name(self):
        fp = self.get_data_path('invalid/empty-column-name.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'column without a name'):
            Metadata.load(fp)
Esempio n. 46
0
    def test_id_conflicts_with_id_header(self):
        fp = get_data_path('invalid/id-conflicts-with-id-header.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    "ID 'id' conflicts.*ID column header"):
            Metadata.load(fp)
Esempio n. 47
0
    def test_trailing_columns(self):
        fp = get_data_path('valid/trailing-columns.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Esempio n. 48
0
    def test_duplicate_ids_with_whitespace(self):
        fp = get_data_path('invalid/duplicate-ids-with-whitespace.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'IDs must be unique.*id1'):
            Metadata.load(fp)
Esempio n. 49
0
    def test_no_newline_at_eof(self):
        fp = get_data_path('valid/no-newline-at-eof.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Esempio n. 50
0
    def test_empty_rows(self):
        fp = get_data_path('valid/empty-rows.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Esempio n. 51
0
    def test_comments(self):
        fp = get_data_path('valid/comments.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Esempio n. 52
0
    def test_qiime1_empty_mapping_file(self):
        fp = get_data_path('invalid/qiime1-empty.tsv')

        with self.assertRaisesRegex(MetadataFileError, 'at least one ID'):
            Metadata.load(fp)
Esempio n. 53
0
    def test_mac_line_endings(self):
        fp = get_data_path('valid/mac-line-endings.tsv')

        obs_md = Metadata.load(fp)

        self.assertEqual(obs_md, self.simple_md)
Esempio n. 54
0
    def test_header_only_with_comments_and_empty_rows(self):
        fp = self.get_data_path(
            'invalid/header-only-with-comments-and-empty-rows.tsv')

        with self.assertRaisesRegex(MetadataFileError, 'at least one ID'):
            Metadata.load(fp)
Esempio n. 55
0
    def test_non_utf_8_file(self):
        fp = get_data_path('invalid/non-utf-8.tsv')

        with self.assertRaisesRegex(MetadataFileError,
                                    'encoded as UTF-8 or ASCII'):
            Metadata.load(fp)