def test_init(self, gene_ontology_data, gene_ontology): true_data = gene_ontology_data.dropna() true_all_genes = true_data['Ensembl Gene ID'].unique() true_ontology = defaultdict(dict) for go, df in true_data.groupby('GO Term Accession'): true_ontology[go]['genes'] = set(df['Ensembl Gene ID']) true_ontology[go]['name'] = df['GO Term Name'].values[0] true_ontology[go]['domain'] = df['GO domain'].values[0] true_ontology[go]['n_genes'] = len(true_ontology[go]['genes']) pdt.assert_frame_equal(true_data, gene_ontology.data) pdt.assert_array_equal(sorted(true_all_genes), sorted(gene_ontology.all_genes)) pdt.assert_contains_all(true_ontology.keys(), gene_ontology.ontology) pdt.assert_contains_all(gene_ontology.ontology.keys(), true_ontology) for go, true_attributes in true_ontology.items(): test_attributes = gene_ontology.ontology[go] true_genes = sorted(true_attributes['genes']) test_genes = sorted(test_attributes['genes']) pdt.assert_array_equal(true_genes, test_genes) pdt.assert_equal(true_attributes['name'], test_attributes['name']) pdt.assert_equal(true_attributes['domain'], test_attributes['domain']) pdt.assert_equal(true_attributes['n_genes'], test_attributes['n_genes'])
def test_init(self, gene_ontology_data, gene_ontology): true_data = gene_ontology_data.dropna() true_all_genes = true_data['Ensembl Gene ID'].unique() true_ontology = defaultdict(dict) for go, df in true_data.groupby('GO Term Accession'): true_ontology[go]['genes'] = set(df['Ensembl Gene ID']) true_ontology[go]['name'] = df['GO Term Name'].values[0] true_ontology[go]['domain'] = df['GO domain'].values[0] true_ontology[go]['n_genes'] = len(true_ontology[go]['genes']) pdt.assert_frame_equal(true_data, gene_ontology.data) pdt.assert_numpy_array_equal(sorted(true_all_genes), sorted(gene_ontology.all_genes)) pdt.assert_contains_all(true_ontology.keys(), gene_ontology.ontology) pdt.assert_contains_all(gene_ontology.ontology.keys(), true_ontology) for go, true_attributes in true_ontology.items(): test_attributes = gene_ontology.ontology[go] true_genes = sorted(true_attributes['genes']) test_genes = sorted(test_attributes['genes']) pdt.assert_numpy_array_equal(true_genes, test_genes) pdt.assert_equal(true_attributes['name'], test_attributes['name']) pdt.assert_equal(true_attributes['domain'], test_attributes['domain']) pdt.assert_equal(true_attributes['n_genes'], test_attributes['n_genes'])
def test_reading_all_sheets_with_blank(self, read_ext): # Test reading all sheetnames by setting sheetname to None, # In the case where some sheets are blank. # Issue #11711 basename = 'blank_with_header' dfs = pd.read_excel(basename + read_ext, sheet_name=None) expected_keys = ['Sheet1', 'Sheet2', 'Sheet3'] tm.assert_contains_all(expected_keys, dfs.keys())
def test_reading_all_sheets(self, read_ext): # Test reading all sheetnames by setting sheetname to None, # Ensure a dict is returned. # See PR #9450 basename = 'test_multisheet' dfs = pd.read_excel(basename + read_ext, sheet_name=None) # ensure this is not alphabetical to test order preservation expected_keys = ['Charlie', 'Alpha', 'Beta'] tm.assert_contains_all(expected_keys, dfs.keys()) # Issue 9930 # Ensure sheet order is preserved assert expected_keys == list(dfs.keys())
def test_reading_multiple_specific_sheets(self, read_ext): # Test reading specific sheetnames by specifying a mixed list # of integers and strings, and confirm that duplicated sheet # references (positions/names) are removed properly. # Ensure a dict is returned # See PR #9450 basename = 'test_multisheet' # Explicitly request duplicates. Only the set should be returned. expected_keys = [2, 'Charlie', 'Charlie'] dfs = pd.read_excel(basename + read_ext, sheet_name=expected_keys) expected_keys = list(set(expected_keys)) tm.assert_contains_all(expected_keys, dfs.keys()) assert len(expected_keys) == len(dfs.keys())
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 tm.skip_if_no_package('scipy') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(3, dtype=dtype) # GH 16179 arr[0, 1] = dtype(2) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible was_upcast = ((fill_value is None or is_float(fill_value)) and not is_object_dtype(dtype) and not is_float_dtype(dtype)) res_dtype = (bool if is_bool_dtype(dtype) else float if was_upcast else dtype) tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) assert sdf.to_coo().dtype == res_dtype # However, adding a str column results in an upcast to object sdf['strings'] = np.arange(len(sdf)).astype(str) assert sdf.to_coo().dtype == np.object_
def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object columns = list('cd') index = list('ab') if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(scipy.__version__) >= LooseVersion('0.19.0')): pytest.skip("dok_matrix from object does not work in SciPy >= 0.19") # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal with catch_warnings(record=True): assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible res_dtype = object tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) assert sdf.to_coo().dtype == res_dtype
def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object columns = list('cd') index = list('ab') if (spmatrix is scipy.sparse.dok_matrix and LooseVersion( scipy.__version__) >= LooseVersion('0.19.0')): pytest.skip("dok_matrix from object does not work in SciPy >= 0.19") # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(SparseDtype(object, fill_value)) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible res_dtype = object tm.assert_contains_all(sdf.dtypes.apply(lambda dtype: dtype.subtype), {np.dtype(res_dtype)}) assert sdf.to_coo().dtype == res_dtype
def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object columns = list('cd') index = list('ab') tm.skip_if_no_package('scipy', max_version='0.19.0') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) # Ensure dtype is preserved if possible res_dtype = object tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) tm.assert_equal(sdf.to_coo().dtype, res_dtype)
def test_change_phenotype_col(self, phenotype_order, phenotype_to_color, phenotype_to_marker): from flotilla.data_model.metadata import MetaData metadata = self.metadata.copy() metadata['phenotype2'] = np.random.choice(list('QXYZ'), size=self.n) test_metadata = MetaData(metadata, phenotype_order, phenotype_to_color, phenotype_to_marker, phenotype_col='phenotype') test_metadata.phenotype_col = 'phenotype2' pdt.assert_array_equal(test_metadata.unique_phenotypes, metadata.phenotype2.unique()) pdt.assert_contains_all(metadata.phenotype2.unique(), test_metadata.phenotype_to_color) pdt.assert_contains_all(metadata.phenotype2.unique(), test_metadata.phenotype_to_marker) pdt.assert_array_equal(test_metadata.phenotype_order, list(sorted(metadata.phenotype2.unique())))
def test_change_phenotype_col(self, phenotype_order, phenotype_to_color, phenotype_to_marker): from flotilla.data_model.metadata import MetaData metadata = self.metadata.copy() metadata['phenotype2'] = np.random.choice(list('QXYZ'), size=self.n) test_metadata = MetaData(metadata, phenotype_order, phenotype_to_color, phenotype_to_marker, phenotype_col='phenotype') test_metadata.phenotype_col = 'phenotype2' pdt.assert_numpy_array_equal(test_metadata.unique_phenotypes, metadata.phenotype2.unique()) pdt.assert_contains_all(metadata.phenotype2.unique(), test_metadata.phenotype_to_color) pdt.assert_contains_all(metadata.phenotype2.unique(), test_metadata.phenotype_to_marker) pdt.assert_numpy_array_equal(test_metadata.phenotype_order, list(sorted(metadata.phenotype2.unique())))
def test_constructor(self): # regular instance creation tm.assert_contains_all(self.strIndex, self.strIndex) tm.assert_contains_all(self.dateIndex, self.dateIndex) # casting arr = np.array(self.strIndex) index = arr.view(Index) tm.assert_contains_all(arr, index) self.assert_(np.array_equal(self.strIndex, index))
def test_add(self): firstCat = self.strIndex + self.dateIndex secondCat = self.strIndex + self.strIndex self.assert_(tm.equalContents(np.append(self.strIndex, self.dateIndex), firstCat)) self.assert_(tm.equalContents(secondCat, self.strIndex)) tm.assert_contains_all(self.strIndex, firstCat.indexMap) tm.assert_contains_all(self.strIndex, secondCat.indexMap) tm.assert_contains_all(self.dateIndex, firstCat.indexMap) # this is valid too shifted = self.dateIndex + timedelta(1)
def test_constructor(self): # regular instance creation common.assert_contains_all(self.strIndex, self.strIndex) common.assert_contains_all(self.dateIndex, self.dateIndex) # casting arr = np.array(self.strIndex) index = arr.view(Index) common.assert_contains_all(arr, index) self.assert_(np.array_equal(self.strIndex, index)) # corner case self.assertRaises(Exception, Index, 0)
def test_add(self): firstCat = self.strIndex + self.dateIndex secondCat = self.strIndex + self.strIndex self.assert_(common.equalContents(np.append(self.strIndex, self.dateIndex), firstCat)) self.assert_(common.equalContents(secondCat, self.strIndex)) common.assert_contains_all(self.strIndex, firstCat.indexMap) common.assert_contains_all(self.strIndex, secondCat.indexMap) common.assert_contains_all(self.dateIndex, firstCat.indexMap) # this is valid too shifted = self.dateIndex + timedelta(1)
def test_contains(self): tm.assert_contains_all(self.ts.index, self.ts)
def test_contains(self): common.assert_contains_all(self.ts.index, self.ts)
def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series)