def test_3_columns(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) exp = pd.DataFrame([['k__Foo; p__Bar', '-1.0'], ['k__Foo; p__Baz', '-42.0']], index=index, columns=['Taxon', 'Confidence'], dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '3-column.tsv'))) assert_frame_equal(obs, exp) # has_header=True obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '3-column.tsv')), has_header=True) assert_frame_equal(obs, exp)
def test_2_columns(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) exp = pd.DataFrame([['k__Bacteria; p__Proteobacteria'], ['k__Bacteria']], index=index, columns=['Taxon'], dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '2-column.tsv'))) assert_frame_equal(obs, exp) # has_header=True obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '2-column.tsv')), has_header=True) assert_frame_equal(obs, exp)
def test_3_columns(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) exp = pd.DataFrame([['k__Foo; p__Bar', '-1.0'], ['k__Foo; p__Baz', '-42.0']], index=index, columns=['Taxon', 'Confidence'], dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '3-column.tsv'))) assert_frame_equal(obs, exp) # has_header=True obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '3-column.tsv')), has_header=True) assert_frame_equal(obs, exp)
def test_2_columns(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) exp = pd.DataFrame([['k__Bacteria; p__Proteobacteria'], ['k__Bacteria']], index=index, columns=['Taxon'], dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '2-column.tsv'))) assert_frame_equal(obs, exp) # has_header=True obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '2-column.tsv')), has_header=True) assert_frame_equal(obs, exp)
def test_headerless(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) columns = ['Taxon', 'Unnamed Column 1', 'Unnamed Column 2'] exp = pd.DataFrame([['k__Foo; p__Bar', 'some', 'another'], ['k__Foo; p__Baz', 'column', 'column!']], index=index, columns=columns, dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'headerless.tsv'))) assert_frame_equal(obs, exp) # has_header=False obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'headerless.tsv')), has_header=False) assert_frame_equal(obs, exp)
def test_headerless(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) columns = ['Taxon', 'Unnamed Column 1', 'Unnamed Column 2'] exp = pd.DataFrame([['k__Foo; p__Bar', 'some', 'another'], ['k__Foo; p__Baz', 'column', 'column!']], index=index, columns=columns, dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'headerless.tsv'))) assert_frame_equal(obs, exp) # has_header=False obs = _taxonomy_formats_to_dataframe(self.get_data_path( os.path.join('taxonomy', 'headerless.tsv')), has_header=False) assert_frame_equal(obs, exp)
def test_valid_but_messy_file(self): index = pd.Index( ['SEQUENCE1', 'seq2'], name='Feature ID', dtype=object) exp = pd.DataFrame([['k__Bar; p__Baz', 'foo'], ['some; taxonomy; for; ya', 'bar baz']], index=index, columns=['Taxon', 'Extra Column'], dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'valid-but-messy.tsv'))) assert_frame_equal(obs, exp) # has_header=True obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'valid-but-messy.tsv')), has_header=True) assert_frame_equal(obs, exp)
def test_valid_but_messy_file(self): index = pd.Index( ['SEQUENCE1', 'seq2'], name='Feature ID', dtype=object) exp = pd.DataFrame([['k__Bar; p__Baz', 'foo'], ['some; taxonomy; for; ya', 'bar baz']], index=index, columns=['Taxon', 'Extra Column'], dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'valid-but-messy.tsv'))) assert_frame_equal(obs, exp) # has_header=True obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'valid-but-messy.tsv')), has_header=True) assert_frame_equal(obs, exp)
def test_duplicate_columns(self): with self.assertRaisesRegex(ValueError, 'duplicated: Column1'): _taxonomy_formats_to_dataframe( self.get_data_path( os.path.join('taxonomy', 'duplicate-columns.tsv')))
def test_has_header_with_headerless(self): with self.assertRaisesRegex(ValueError, 'requires a header'): _taxonomy_formats_to_dataframe(self.get_data_path( os.path.join('taxonomy', 'headerless.tsv')), has_header=True)
def test_jagged(self): with self.assertRaises(pandas.errors.ParserError): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'jagged.tsv')))
def test_empty(self): with self.assertRaises(pandas.errors.EmptyDataError): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'empty')))
def test_header_only(self): with self.assertRaisesRegex(ValueError, 'one row of data'): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'header-only.tsv')))
def test_blanks_and_comments(self): with self.assertRaises(pandas.io.common.EmptyDataError): _taxonomy_formats_to_dataframe( self.get_data_path( os.path.join('taxonomy', 'blanks-and-comments')))
def test_one_column(self): with self.assertRaisesRegex(ValueError, "two columns, found 1"): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '1-column.tsv')))
def test_one_column(self): with self.assertRaisesRegex(ValueError, "two columns, found 1"): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '1-column.tsv')))
def test_blanks_and_comments(self): with self.assertRaises(pandas.io.common.EmptyDataError): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'blanks-and-comments')))
def test_empty(self): with self.assertRaises(pandas.io.common.EmptyDataError): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'empty')))
def test_header_only(self): with self.assertRaisesRegex(ValueError, 'one row of data'): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'header-only.tsv')))
def test_duplicate_columns(self): with self.assertRaisesRegex(ValueError, 'duplicated: Column1'): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join( 'taxonomy', 'duplicate-columns.tsv')))
def test_jagged(self): with self.assertRaises(pandas.io.common.CParserError): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'jagged.tsv')))
def test_has_header_with_headerless(self): with self.assertRaisesRegex(ValueError, 'requires a header'): _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'headerless.tsv')), has_header=True)