def test_various_numbers(self): numbers = [ 0.0, -0.0, np.nan, 1.0, 42.0, -33.0, 1e-10, 1.5e15, 0.0003, -4.234, # This last number should be rounded because it exceeds 15 digits # of precision. 12.34567891234567 ] index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9', 'id10', 'id11'], name='ID') md = Metadata(pd.DataFrame({'numbers': numbers}, index=index)) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "ID\tnumbers\n" "#q2:types\tnumeric\n" "id1\t0\n" "id2\t-0\n" "id3\t\n" "id4\t1\n" "id5\t42\n" "id6\t-33\n" "id7\t1e-10\n" "id8\t1.5e+15\n" "id9\t0.0003\n" "id10\t-4.234\n" "id11\t12.3456789123457\n" ) self.assertEqual(obs, exp)
def test_ids_and_column_names_as_numeric_strings(self): index = pd.Index(['0.000001', '0.004000', '0.000000'], dtype=object, name='id') columns = ['42.0', '1000', '-4.2'] data = [ [2.0, 'b', 2.5], [1.0, 'b', 4.2], [3.0, 'c', -9.999] ] df = pd.DataFrame(data, index=index, columns=columns) md = Metadata(df) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\t42.0\t1000\t-4.2\n" "#q2:types\tnumeric\tcategorical\tnumeric\n" "0.000001\t2\tb\t2.5\n" "0.004000\t1\tb\t4.2\n" "0.000000\t3\tc\t-9.999\n" ) self.assertEqual(obs, exp)
def test_minimal(self): md = Metadata(pd.DataFrame({}, index=pd.Index(['my-id'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ("id\n" "#q2:types\n" "my-id\n") self.assertEqual(obs, exp)
def test_no_columns(self): md = Metadata( pd.DataFrame({}, index=pd.Index(['foo', 'bar', 'baz'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ("id\n" "#q2:types\n" "foo\n" "bar\n" "baz\n") self.assertEqual(obs, exp)
def test_no_bom(self): md = Metadata(pd.DataFrame( {'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'rb') as fh: obs = fh.read(2) self.assertEqual(obs, b'id')
def test_minimal(self): md = Metadata(pd.DataFrame({}, index=pd.Index(['my-id'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\n" "#q2:types\n" "my-id\n" ) self.assertEqual(obs, exp)
def test_no_bom(self): md = Metadata( pd.DataFrame( { 'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42'] }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'rb') as fh: obs = fh.read(2) self.assertEqual(obs, b'id')
def test_single_id(self): md = Metadata(pd.DataFrame( {'col1': ['foo'], 'col2': [4.002]}, index=pd.Index(['my-id'], name='featureid'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "featureid\tcol1\tcol2\n" "#q2:types\tcategorical\tnumeric\n" "my-id\tfoo\t4.002\n" ) self.assertEqual(obs, exp)
def test_single_column(self): md = Metadata( pd.DataFrame({'col1': ['42', '4.3', '4.4000']}, index=pd.Index(['foo', 'bar', 'baz'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ("id\tcol1\n" "#q2:types\tcategorical\n" "foo\t42\n" "bar\t4.3\n" "baz\t4.4000\n") self.assertEqual(obs, exp)
def test_unsorted_column_order(self): index = pd.Index(['id1', 'id2', 'id3'], name='id') columns = ['z', 'b', 'y'] data = [[1.0, 'a', 'foo'], [2.0, 'b', 'bar'], [3.0, 'c', '42']] md = Metadata(pd.DataFrame(data, index=index, columns=columns)) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ("id\tz\tb\ty\n" "#q2:types\tnumeric\tcategorical\tcategorical\n" "id1\t1\ta\tfoo\n" "id2\t2\tb\tbar\n" "id3\t3\tc\t42\n") self.assertEqual(obs, exp)
def test_no_columns(self): md = Metadata(pd.DataFrame( {}, index=pd.Index(['foo', 'bar', 'baz'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\n" "#q2:types\n" "foo\n" "bar\n" "baz\n" ) self.assertEqual(obs, exp)
def test_single_column(self): md = Metadata(pd.DataFrame( {'col1': ['42', '4.3', '4.4000']}, index=pd.Index(['foo', 'bar', 'baz'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\n" "#q2:types\tcategorical\n" "foo\t42\n" "bar\t4.3\n" "baz\t4.4000\n" ) self.assertEqual(obs, exp)
def test_some_missing_data(self): md = Metadata( pd.DataFrame({'col1': [42.0, np.nan, -3.5], 'col2': ['a', np.nan, np.nan]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\n" "#q2:types\tnumeric\tcategorical\n" "id1\t42\ta\n" "id2\t\t\n" "id3\t-3.5\t\n" ) self.assertEqual(obs, exp)
def test_alternate_id_header(self): md = Metadata(pd.DataFrame( {'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='#SampleID'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "#SampleID\tcol1\tcol2\tcol3\n" "#q2:types\tnumeric\tcategorical\tcategorical\n" "id1\t1\ta\tfoo\n" "id2\t2\tb\tbar\n" "id3\t3\tc\t42\n" ) self.assertEqual(obs, exp)
def test_different_file_extension(self): md = Metadata(pd.DataFrame( {'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) filepath = os.path.join(self.temp_dir, 'metadata.txt') md.save(filepath) with open(filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\tcol3\n" "#q2:types\tnumeric\tcategorical\tcategorical\n" "id1\t1\ta\tfoo\n" "id2\t2\tb\tbar\n" "id3\t3\tc\t42\n" ) self.assertEqual(obs, exp)
def test_all_missing_data(self): # nan-only columns that are numeric or categorical. md = Metadata( pd.DataFrame({'col1': [np.nan, np.nan, np.nan], 'col2': np.array([np.nan, np.nan, np.nan], dtype=object)}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\n" "#q2:types\tnumeric\tcategorical\n" "id1\t\t\n" "id2\t\t\n" "id3\t\t\n" ) self.assertEqual(obs, exp)
def test_all_missing_data(self): # nan-only columns that are numeric or categorical. md = Metadata( pd.DataFrame( { 'col1': [np.nan, np.nan, np.nan], 'col2': np.array([np.nan, np.nan, np.nan], dtype=object) }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ("id\tcol1\tcol2\n" "#q2:types\tnumeric\tcategorical\n" "id1\t\t\n" "id2\t\t\n" "id3\t\t\n") self.assertEqual(obs, exp)
def test_unsorted_column_order(self): index = pd.Index(['id1', 'id2', 'id3'], name='id') columns = ['z', 'b', 'y'] data = [ [1.0, 'a', 'foo'], [2.0, 'b', 'bar'], [3.0, 'c', '42'] ] md = Metadata(pd.DataFrame(data, index=index, columns=columns)) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tz\tb\ty\n" "#q2:types\tnumeric\tcategorical\tcategorical\n" "id1\t1\ta\tfoo\n" "id2\t2\tb\tbar\n" "id3\t3\tc\t42\n" ) self.assertEqual(obs, exp)
def test_save_metadata_auto_extension(self): md = Metadata( pd.DataFrame( { 'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42'] }, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) # Filename & extension endswith is matching (non-default). fp = os.path.join(self.temp_dir, 'metadatatsv') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadatatsv.tsv') # No period in filename; no extension included. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp) obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata') # No period in filename; no period in extension. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp, 'tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # No period in filename; multiple periods in extension. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp, '..tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Single period in filename; no period in extension. fp = os.path.join(self.temp_dir, 'metadata.') obs_md = md.save(fp, 'tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Single period in filename; single period in extension. fp = os.path.join(self.temp_dir, 'metadata.') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Single period in filename; multiple periods in extension. fp = os.path.join(self.temp_dir, 'metadata.') obs_md = md.save(fp, '..tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Multiple periods in filename; single period in extension. fp = os.path.join(self.temp_dir, 'metadata..') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Multiple periods in filename; multiple periods in extension. fp = os.path.join(self.temp_dir, 'metadata..') obs_md = md.save(fp, '..tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # No extension in filename; no extension input. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp) obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata') # No extension in filename; extension input. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Extension in filename; no extension input. fp = os.path.join(self.temp_dir, 'metadata.tsv') obs_md = md.save(fp) obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Extension in filename; extension input (non-matching). fp = os.path.join(self.temp_dir, 'metadata.tsv') obs_md = md.save(fp, '.txt') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv.txt') # Extension in filename; extension input (matching). fp = os.path.join(self.temp_dir, 'metadata.tsv') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv')