def test_kmer_map_to_metadata(self): known = pd.DataFrame(data=[[ 'Batman', 'Batman', 'Batman', 'Gotham', 'WANTCAT', 'CATCATCAT', 50. ], [ 'Superman', 'Superman', 'Superman', 'Metropolis', 'CATDAD', 'DADCAT', 50. ]], columns=[ 'db-seq', 'seq-name', 'kmer', 'region', 'fwd-primer', 'rev-primer', 'kmer-length' ], index=pd.Index(['0', '1'], name='id')) filepath = os.path.join(self.base_dir, 'kmer-map.tsv') format = KmerMapFormat(filepath, mode='r') test = t._2(format) self.assertTrue(isinstance(test, Metadata)) columns = dict(test.columns) npt.assert_array_equal(list(columns.keys()), [ 'db-seq', 'seq-name', 'kmer', 'region', 'fwd-primer', 'rev-primer', 'kmer-length' ]) for k, v in columns.items(): if k == 'kmer-length': self.assertEqual(v.type, 'numeric') else: self.assertEqual(v.type, 'categorical') pdt.assert_frame_equal(known, test.to_dataframe())
def test_kmer_map_delayed_frame(self): known = pd.DataFrame(data=[[ 'Batman', 'Batman', 'Gotham', 'WANTCAT', 'CATCATCAT', 50 ], ['Superman', 'Superman', 'Metropolis', 'CATDAD', 'DADCAT', 50]], columns=[ 'seq-name', 'kmer', 'region', 'fwd-primer', 'rev-primer', 'kmer-length' ], index=pd.Index(['Batman', 'Superman'], name='db-seq')) filepath = os.path.join(self.base_dir, 'kmer-map.tsv') format = KmerMapFormat(filepath, mode='r') test = t._3(format) self.assertTrue(isinstance(test, dd.DataFrame)) pdt.assert_frame_equal(known, test.compute())
def _4(obj: pd.DataFrame) -> KmerMapFormat: ff = KmerMapFormat() obj.to_csv(str(ff), sep='\t') return ff