Example #1
0
    def test_init(self, gene_ontology_data, gene_ontology):
        true_data = gene_ontology_data.dropna()
        true_all_genes = true_data['Ensembl Gene ID'].unique()
        true_ontology = defaultdict(dict)

        for go, df in true_data.groupby('GO Term Accession'):
            true_ontology[go]['genes'] = set(df['Ensembl Gene ID'])
            true_ontology[go]['name'] = df['GO Term Name'].values[0]
            true_ontology[go]['domain'] = df['GO domain'].values[0]
            true_ontology[go]['n_genes'] = len(true_ontology[go]['genes'])

        pdt.assert_frame_equal(true_data, gene_ontology.data)
        pdt.assert_array_equal(sorted(true_all_genes),
                               sorted(gene_ontology.all_genes))

        pdt.assert_contains_all(true_ontology.keys(), gene_ontology.ontology)
        pdt.assert_contains_all(gene_ontology.ontology.keys(), true_ontology)

        for go, true_attributes in true_ontology.items():
            test_attributes = gene_ontology.ontology[go]
            true_genes = sorted(true_attributes['genes'])
            test_genes = sorted(test_attributes['genes'])
            pdt.assert_array_equal(true_genes, test_genes)
            pdt.assert_equal(true_attributes['name'], test_attributes['name'])
            pdt.assert_equal(true_attributes['domain'],
                             test_attributes['domain'])
            pdt.assert_equal(true_attributes['n_genes'],
                             test_attributes['n_genes'])
    def test_init(self, gene_ontology_data, gene_ontology):
        true_data = gene_ontology_data.dropna()
        true_all_genes = true_data['Ensembl Gene ID'].unique()
        true_ontology = defaultdict(dict)

        for go, df in true_data.groupby('GO Term Accession'):
            true_ontology[go]['genes'] = set(df['Ensembl Gene ID'])
            true_ontology[go]['name'] = df['GO Term Name'].values[0]
            true_ontology[go]['domain'] = df['GO domain'].values[0]
            true_ontology[go]['n_genes'] = len(true_ontology[go]['genes'])

        pdt.assert_frame_equal(true_data, gene_ontology.data)
        pdt.assert_numpy_array_equal(sorted(true_all_genes),
                               sorted(gene_ontology.all_genes))

        pdt.assert_contains_all(true_ontology.keys(), gene_ontology.ontology)
        pdt.assert_contains_all(gene_ontology.ontology.keys(), true_ontology)

        for go, true_attributes in true_ontology.items():
            test_attributes = gene_ontology.ontology[go]
            true_genes = sorted(true_attributes['genes'])
            test_genes = sorted(test_attributes['genes'])
            pdt.assert_numpy_array_equal(true_genes, test_genes)
            pdt.assert_equal(true_attributes['name'], test_attributes['name'])
            pdt.assert_equal(true_attributes['domain'],
                             test_attributes['domain'])
            pdt.assert_equal(true_attributes['n_genes'],
                             test_attributes['n_genes'])
Example #3
0
 def test_reading_all_sheets_with_blank(self, read_ext):
     # Test reading all sheetnames by setting sheetname to None,
     # In the case where some sheets are blank.
     # Issue #11711
     basename = 'blank_with_header'
     dfs = pd.read_excel(basename + read_ext, sheet_name=None)
     expected_keys = ['Sheet1', 'Sheet2', 'Sheet3']
     tm.assert_contains_all(expected_keys, dfs.keys())
Example #4
0
 def test_reading_all_sheets_with_blank(self, read_ext):
     # Test reading all sheetnames by setting sheetname to None,
     # In the case where some sheets are blank.
     # Issue #11711
     basename = 'blank_with_header'
     dfs = pd.read_excel(basename + read_ext, sheet_name=None)
     expected_keys = ['Sheet1', 'Sheet2', 'Sheet3']
     tm.assert_contains_all(expected_keys, dfs.keys())
Example #5
0
 def test_reading_all_sheets(self, read_ext):
     # Test reading all sheetnames by setting sheetname to None,
     # Ensure a dict is returned.
     # See PR #9450
     basename = 'test_multisheet'
     dfs = pd.read_excel(basename + read_ext, sheet_name=None)
     # ensure this is not alphabetical to test order preservation
     expected_keys = ['Charlie', 'Alpha', 'Beta']
     tm.assert_contains_all(expected_keys, dfs.keys())
     # Issue 9930
     # Ensure sheet order is preserved
     assert expected_keys == list(dfs.keys())
Example #6
0
 def test_reading_all_sheets(self, read_ext):
     # Test reading all sheetnames by setting sheetname to None,
     # Ensure a dict is returned.
     # See PR #9450
     basename = 'test_multisheet'
     dfs = pd.read_excel(basename + read_ext, sheet_name=None)
     # ensure this is not alphabetical to test order preservation
     expected_keys = ['Charlie', 'Alpha', 'Beta']
     tm.assert_contains_all(expected_keys, dfs.keys())
     # Issue 9930
     # Ensure sheet order is preserved
     assert expected_keys == list(dfs.keys())
Example #7
0
 def test_reading_multiple_specific_sheets(self, read_ext):
     # Test reading specific sheetnames by specifying a mixed list
     # of integers and strings, and confirm that duplicated sheet
     # references (positions/names) are removed properly.
     # Ensure a dict is returned
     # See PR #9450
     basename = 'test_multisheet'
     # Explicitly request duplicates. Only the set should be returned.
     expected_keys = [2, 'Charlie', 'Charlie']
     dfs = pd.read_excel(basename + read_ext, sheet_name=expected_keys)
     expected_keys = list(set(expected_keys))
     tm.assert_contains_all(expected_keys, dfs.keys())
     assert len(expected_keys) == len(dfs.keys())
Example #8
0
 def test_reading_multiple_specific_sheets(self, read_ext):
     # Test reading specific sheetnames by specifying a mixed list
     # of integers and strings, and confirm that duplicated sheet
     # references (positions/names) are removed properly.
     # Ensure a dict is returned
     # See PR #9450
     basename = 'test_multisheet'
     # Explicitly request duplicates. Only the set should be returned.
     expected_keys = [2, 'Charlie', 'Charlie']
     dfs = pd.read_excel(basename + read_ext, sheet_name=expected_keys)
     expected_keys = list(set(expected_keys))
     tm.assert_contains_all(expected_keys, dfs.keys())
     assert len(expected_keys) == len(dfs.keys())
Example #9
0
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
    # GH 4343
    tm.skip_if_no_package('scipy')

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(3, dtype=dtype)
    # GH 16179
    arr[0, 1] = dtype(2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = pd.SparseDataFrame(spm,
                             index=index,
                             columns=columns,
                             default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    was_upcast = ((fill_value is None or is_float(fill_value))
                  and not is_object_dtype(dtype) and not is_float_dtype(dtype))
    res_dtype = (bool
                 if is_bool_dtype(dtype) else float if was_upcast else dtype)
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype

    # However, adding a str column results in an upcast to object
    sdf['strings'] = np.arange(len(sdf)).astype(str)
    assert sdf.to_coo().dtype == np.object_
Example #10
0
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
    # GH 4343
    tm.skip_if_no_package('scipy')

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(3, dtype=dtype)
    # GH 16179
    arr[0, 1] = dtype(2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = pd.SparseDataFrame(spm, index=index, columns=columns,
                             default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    was_upcast = ((fill_value is None or is_float(fill_value)) and
                  not is_object_dtype(dtype) and
                  not is_float_dtype(dtype))
    res_dtype = (bool if is_bool_dtype(dtype) else
                 float if was_upcast else
                 dtype)
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype

    # However, adding a str column results in an upcast to object
    sdf['strings'] = np.arange(len(sdf)).astype(str)
    assert sdf.to_coo().dtype == np.object_
Example #11
0
def test_from_to_scipy_object(spmatrix, fill_value):
    # GH 4343
    dtype = object
    columns = list('cd')
    index = list('ab')

    if (spmatrix is scipy.sparse.dok_matrix
            and LooseVersion(scipy.__version__) >= LooseVersion('0.19.0')):
        pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm,
                          index=index,
                          columns=columns,
                          default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    with catch_warnings(record=True):
        assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    res_dtype = object
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype
def test_from_to_scipy_object(spmatrix, fill_value):
    # GH 4343
    dtype = object
    columns = list('cd')
    index = list('ab')

    if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(
            scipy.__version__) >= LooseVersion('0.19.0')):
        pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm, index=index, columns=columns,
                          default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(SparseDtype(object, fill_value))
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    assert dict(sdf.to_coo().todok()) == dict(spm.todok())

    # Ensure dtype is preserved if possible
    res_dtype = object
    tm.assert_contains_all(sdf.dtypes.apply(lambda dtype: dtype.subtype),
                           {np.dtype(res_dtype)})
    assert sdf.to_coo().dtype == res_dtype
Example #13
0
def test_from_to_scipy_object(spmatrix, fill_value):
    # GH 4343
    dtype = object
    columns = list('cd')
    index = list('ab')
    tm.skip_if_no_package('scipy', max_version='0.19.0')

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = pd.SparseDataFrame(spm,
                             index=index,
                             columns=columns,
                             default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok()))

    # Ensure dtype is preserved if possible
    res_dtype = object
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    tm.assert_equal(sdf.to_coo().dtype, res_dtype)
Example #14
0
def test_from_to_scipy_object(spmatrix, fill_value):
    # GH 4343
    dtype = object
    columns = list('cd')
    index = list('ab')
    tm.skip_if_no_package('scipy', max_version='0.19.0')

    # Make one ndarray and from it one sparse matrix, both to be used for
    # constructing frames and comparing results
    arr = np.eye(2, dtype=dtype)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = pd.SparseDataFrame(spm, index=index, columns=columns,
                             default_fill_value=fill_value)

    # Expected result construction is kind of tricky for all
    # dtype-fill_value combinations; easiest to cast to something generic
    # and except later on
    rarr = arr.astype(object)
    rarr[arr == 0] = np.nan
    expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna(
        fill_value if fill_value is not None else np.nan)

    # Assert frame is as expected
    sdf_obj = sdf.astype(object)
    tm.assert_sp_frame_equal(sdf_obj, expected)
    tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())

    # Assert spmatrices equal
    tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok()))

    # Ensure dtype is preserved if possible
    res_dtype = object
    tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
    tm.assert_equal(sdf.to_coo().dtype, res_dtype)
Example #15
0
    def test_change_phenotype_col(self, phenotype_order, phenotype_to_color,
                                  phenotype_to_marker):
        from flotilla.data_model.metadata import MetaData

        metadata = self.metadata.copy()
        metadata['phenotype2'] = np.random.choice(list('QXYZ'), size=self.n)

        test_metadata = MetaData(metadata, phenotype_order,
                                 phenotype_to_color,
                                 phenotype_to_marker,
                                 phenotype_col='phenotype')
        test_metadata.phenotype_col = 'phenotype2'

        pdt.assert_array_equal(test_metadata.unique_phenotypes,
                               metadata.phenotype2.unique())
        pdt.assert_contains_all(metadata.phenotype2.unique(),
                                test_metadata.phenotype_to_color)
        pdt.assert_contains_all(metadata.phenotype2.unique(),
                                test_metadata.phenotype_to_marker)
        pdt.assert_array_equal(test_metadata.phenotype_order,
                               list(sorted(metadata.phenotype2.unique())))
Example #16
0
    def test_change_phenotype_col(self, phenotype_order, phenotype_to_color,
                                  phenotype_to_marker):
        from flotilla.data_model.metadata import MetaData

        metadata = self.metadata.copy()
        metadata['phenotype2'] = np.random.choice(list('QXYZ'), size=self.n)

        test_metadata = MetaData(metadata, phenotype_order,
                                 phenotype_to_color,
                                 phenotype_to_marker,
                                 phenotype_col='phenotype')
        test_metadata.phenotype_col = 'phenotype2'

        pdt.assert_numpy_array_equal(test_metadata.unique_phenotypes,
                               metadata.phenotype2.unique())
        pdt.assert_contains_all(metadata.phenotype2.unique(),
                                test_metadata.phenotype_to_color)
        pdt.assert_contains_all(metadata.phenotype2.unique(),
                                test_metadata.phenotype_to_marker)
        pdt.assert_numpy_array_equal(test_metadata.phenotype_order,
                               list(sorted(metadata.phenotype2.unique())))
Example #17
0
    def test_constructor(self):
        # regular instance creation
        tm.assert_contains_all(self.strIndex, self.strIndex)
        tm.assert_contains_all(self.dateIndex, self.dateIndex)

        # casting
        arr = np.array(self.strIndex)
        index = arr.view(Index)
        tm.assert_contains_all(arr, index)
        self.assert_(np.array_equal(self.strIndex, index))
Example #18
0
    def test_constructor(self):
        # regular instance creation
        tm.assert_contains_all(self.strIndex, self.strIndex)
        tm.assert_contains_all(self.dateIndex, self.dateIndex)

        # casting
        arr = np.array(self.strIndex)
        index = arr.view(Index)
        tm.assert_contains_all(arr, index)
        self.assert_(np.array_equal(self.strIndex, index))
Example #19
0
    def test_add(self):
        firstCat = self.strIndex + self.dateIndex
        secondCat = self.strIndex + self.strIndex

        self.assert_(tm.equalContents(np.append(self.strIndex, self.dateIndex), firstCat))
        self.assert_(tm.equalContents(secondCat, self.strIndex))
        tm.assert_contains_all(self.strIndex, firstCat.indexMap)
        tm.assert_contains_all(self.strIndex, secondCat.indexMap)
        tm.assert_contains_all(self.dateIndex, firstCat.indexMap)

        # this is valid too
        shifted = self.dateIndex + timedelta(1)
Example #20
0
    def test_constructor(self):
        # regular instance creation
        common.assert_contains_all(self.strIndex, self.strIndex)
        common.assert_contains_all(self.dateIndex, self.dateIndex)

        # casting
        arr = np.array(self.strIndex)
        index = arr.view(Index)
        common.assert_contains_all(arr, index)
        self.assert_(np.array_equal(self.strIndex, index))

        # corner case
        self.assertRaises(Exception, Index, 0)
Example #21
0
    def test_add(self):
        firstCat = self.strIndex + self.dateIndex
        secondCat = self.strIndex + self.strIndex

        self.assert_(common.equalContents(np.append(self.strIndex,
                                                    self.dateIndex), firstCat))
        self.assert_(common.equalContents(secondCat, self.strIndex))
        common.assert_contains_all(self.strIndex, firstCat.indexMap)
        common.assert_contains_all(self.strIndex, secondCat.indexMap)
        common.assert_contains_all(self.dateIndex, firstCat.indexMap)

        # this is valid too
        shifted = self.dateIndex + timedelta(1)
Example #22
0
 def test_contains(self):
     tm.assert_contains_all(self.ts.index, self.ts)
Example #23
0
 def test_contains(self):
     tm.assert_contains_all(self.ts.index, self.ts)
Example #24
0
 def test_contains(self):
     common.assert_contains_all(self.ts.index, self.ts)
Example #25
0
 def test_contains(self):
     common.assert_contains_all(self.ts.index, self.ts)
Example #26
0
 def test_contains(self, datetime_series):
     tm.assert_contains_all(datetime_series.index, datetime_series)