def test_invalid_url(self): try: with tm.assertRaises(URLError): self.read_html('http://www.a23950sdfa908sd.com', match='.*Water.*') except ValueError as e: tm.assert_equal(str(e), 'No tables found')
def test_downcast_limits(self): # Test the limits of each downcast. Bug: #14401. # Check to make sure numpy is new enough to run this test. if _np_version_under1p9: raise nose.SkipTest("Numpy version is under 1.9") i = 'integer' u = 'unsigned' dtype_downcast_min_max = [ ('int8', i, [iinfo(np.int8).min, iinfo(np.int8).max]), ('int16', i, [iinfo(np.int16).min, iinfo(np.int16).max]), ('int32', i, [iinfo(np.int32).min, iinfo(np.int32).max]), ('int64', i, [iinfo(np.int64).min, iinfo(np.int64).max]), ('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]), ('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]), ('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]), ('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]), ('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]), ('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]), ('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]), ('int16', i, [iinfo(np.int8).min - 1, iinfo(np.int16).max]), ('int32', i, [iinfo(np.int16).min - 1, iinfo(np.int32).max]), ('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]), ('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), ('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]) ] for dtype, downcast, min_max in dtype_downcast_min_max: series = pd.to_numeric(pd.Series(min_max), downcast=downcast) tm.assert_equal(series.dtype, dtype)
def test_values_consistent(array, expected_type, dtype): l_values = pd.Series(array)._values r_values = pd.Index(array)._values assert type(l_values) is expected_type assert type(l_values) is type(r_values) tm.assert_equal(l_values, r_values)
def test_aggregate_normal(resample_method): """Check TimeGrouper's aggregation is identical as normal groupby.""" if resample_method == 'ohlc': pytest.xfail(reason='DataError: No numeric types to aggregate') data = np.random.randn(20, 4) normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) normal_df['key'] = [1, 2, 3, 4, 5] * 4 dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3), datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) expected = getattr(normal_grouped, resample_method)() dt_result = getattr(dt_grouped, resample_method)() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') tm.assert_equal(expected, dt_result) # if TimeGrouper is used included, 'nth' doesn't work yet """
def test_replace_period(self): d = { 'fname': { 'out_augmented_AUG_2011.json': pd.Period(year=2011, month=8, freq='M'), 'out_augmented_JAN_2011.json': pd.Period(year=2011, month=1, freq='M'), 'out_augmented_MAY_2012.json': pd.Period(year=2012, month=5, freq='M'), 'out_augmented_SUBSIDY_WEEK.json': pd.Period(year=2011, month=4, freq='M'), 'out_augmented_AUG_2012.json': pd.Period(year=2012, month=8, freq='M'), 'out_augmented_MAY_2011.json': pd.Period(year=2011, month=5, freq='M'), 'out_augmented_SEP_2013.json': pd.Period(year=2013, month=9, freq='M')}} df = pd.DataFrame(['out_augmented_AUG_2012.json', 'out_augmented_SEP_2013.json', 'out_augmented_SUBSIDY_WEEK.json', 'out_augmented_MAY_2012.json', 'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json', 'out_augmented_JAN_2011.json'], columns=['fname']) tm.assert_equal(set(df.fname.values), set(d['fname'].keys())) expected = DataFrame({'fname': [d['fname'][k] for k in df.fname.values]}) result = df.replace(d) assert_frame_equal(result, expected)
def test_categorical_warnings_and_errors(self): # Warning for non-string labels # Error for labels too long original = pd.DataFrame.from_records( [['a' * 10000], ['b' * 10000], ['c' * 10000], ['d' * 10000]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: tm.assertRaises(ValueError, original.to_stata, path) original = pd.DataFrame.from_records( [['a'], ['b'], ['c'], ['d'], [1]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with warnings.catch_warnings(record=True) as w: original.to_stata(path) tm.assert_equal(len(w), 1) # should get a warning for mixed content
def test_shift(fill_value): ct = pd.Categorical(['a', 'b', 'c', 'd'], categories=['a', 'b', 'c', 'd'], ordered=False) expected = pd.Categorical([None, 'a', 'b', 'c'], categories=['a', 'b', 'c', 'd'], ordered=False) res = ct.shift(1, fill_value=fill_value) assert_equal(res, expected)
def test_plot_violins(pca, kwargs, df_norm): from flotilla.visualize.decomposition import DecompositionViz kw = kwargs.copy() kw.pop('singles') dv = DecompositionViz(pca.reduced_space, pca.components_, pca.explained_variance_ratio_, singles=df_norm, **kw) dv.plot(plot_violins=True) ncols = 4 nrows = 1 top_features = pd.Index(dv.top_features) vector_labels = list(set(dv.magnitudes[:dv.n_vectors].index.union( top_features))) while ncols * nrows < len(vector_labels): nrows += 1 pdt.assert_equal(len(dv.fig_violins.axes), nrows * ncols) # for i in np.arange(len(top_features)): # ax = dv.fig_violins.axes[i] # pdt.assert_equal(len(ax.collections), len(dv.grouped.groups)) plt.close('all')
def test__init_sample_thresholds(self, expression_data, expression_thresh, metadata_minimum_samples, pooled): from flotilla.data_model.base import BaseData base_data = BaseData(expression_data, thresh=expression_thresh, minimum_samples=metadata_minimum_samples, pooled=pooled) data = expression_data.copy() pooled_samples = pooled.copy() if pooled is not None else [] single_samples = data.index[~data.index.isin(pooled_samples)] singles_df = data.ix[single_samples] if expression_thresh > -np.inf or metadata_minimum_samples > 0: if not singles_df.empty: data = base_data._threshold(data, singles_df) else: data = base_data._threshold(data) singles_df = data.ix[single_samples] pooled_df = data.ix[pooled_samples] pdt.assert_frame_equal(base_data.data_original, expression_data) pdt.assert_frame_equal(base_data.data, data) pdt.assert_equal(base_data.thresh, expression_thresh) pdt.assert_equal(base_data.minimum_samples, metadata_minimum_samples) pdt.assert_frame_equal(base_data.pooled, pooled_df) pdt.assert_frame_equal(base_data.singles, singles_df)
def test_read_write_dta12(self): original = DataFrame( [(1, 2, 3, 4, 5, 6)], columns=[ "astringwithmorethan32characters_1", "astringwithmorethan32characters_2", "+", "-", "short", "delete", ], ) formatted = DataFrame( [(1, 2, 3, 4, 5, 6)], columns=[ "astringwithmorethan32characters_", "_0astringwithmorethan32character", "_", "_1_", "_short", "_delete", ], ) formatted.index.name = "index" formatted = formatted.astype(np.int32) with tm.ensure_clean() as path: with warnings.catch_warnings(record=True) as w: original.to_stata(path, None) tm.assert_equal(len(w), 1) # should get a warning for that format. written_and_read_again = self.read_dta(path) tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted)
def test_ufunc_compat(self, holder): box = pd.Series if holder is pd.Series else pd.Index idx = holder(np.arange(5, dtype='int64')) result = np.sin(idx) expected = box(np.sin(np.arange(5, dtype='int64'))) tm.assert_equal(result, expected)
def test_index_column(self): query = "SELECT 'a' as STRING_1, 'b' as STRING_2" result_frame = gbq.read_gbq( query, project_id=PROJECT_ID, index_col="STRING_1") correct_frame = DataFrame( {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1") tm.assert_equal(result_frame.index.name, correct_frame.index.name)
def test_read_write_dta12(self): original = DataFrame([(1, 2, 3, 4, 5, 6)], columns=['astringwithmorethan32characters_1', 'astringwithmorethan32characters_2', '+', '-', 'short', 'delete']) formatted = DataFrame([(1, 2, 3, 4, 5, 6)], columns=['astringwithmorethan32characters_', '_0astringwithmorethan32character', '_', '_1_', '_short', '_delete']) formatted.index.name = 'index' formatted = formatted.astype(np.int32) with tm.ensure_clean() as path: with warnings.catch_warnings(record=True) as w: original.to_stata(path, None) tm.assert_equal(len(w), 1) # should get a warning for that format. written_and_read_again = self.read_dta(path) tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted)
def assert_graph_items_equal(graph1, items1, graph2, items2): """Checks all relationships in graph1 exist in graph2, and vice versa""" from outrigger.index.adjacencies import DIRECTIONS for number1, item1 in enumerate(items1): for direction in DIRECTIONS: test = [items1[i] for i in graph1.find(getattr(V(number1), direction))] number2 = items2.index(item1) true = [items2[i] for i in graph2.find(getattr(V(number2), direction))] test.sort() true.sort() pdt.assert_equal(test, true) for number2, item2 in enumerate(items2): for direction in DIRECTIONS: test = [items2[i] for i in graph2.find(getattr(V(number2), direction))] number1 = items1.index(item2) true = [items1[i] for i in graph1.find(getattr(V(number1), direction))] test.sort() true.sort() pdt.assert_equal(test, true)
def test_read_dta2(self): if LooseVersion(sys.version) < '2.7': raise nose.SkipTest('datetime interp under 2.6 is faulty') expected = DataFrame.from_records( [ ( datetime(2006, 11, 19, 23, 13, 20), 1479596223000, datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1), datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1) ), ( datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2), datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1), datetime(1955, 1, 1), datetime(2, 1, 1) ), ( pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, ) ], columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date', 'monthly_date', 'quarterly_date', 'half_yearly_date', 'yearly_date'] ) expected['yearly_date'] = expected['yearly_date'].astype('O') with warnings.catch_warnings(record=True) as w: parsed_114 = self.read_dta(self.dta2_114) parsed_115 = self.read_dta(self.dta2_115) parsed_117 = self.read_dta(self.dta2_117) # 113 is buggy due ot limits date format support in Stata # parsed_113 = self.read_dta(self.dta2_113) # should get a warning for that format. tm.assert_equal(len(w), 1) # buggy test because of the NaT comparison on certain platforms # Format 113 test fails since it does not support tc and tC formats # tm.assert_frame_equal(parsed_113, expected) tm.assert_frame_equal(parsed_114, expected) tm.assert_frame_equal(parsed_115, expected) tm.assert_frame_equal(parsed_117, expected)
def test_numpy_errstate_is_default(): # The defaults since numpy 1.6.0 expected = {"over": "warn", "divide": "warn", "invalid": "warn", "under": "ignore"} import numpy as np from pandas.compat import numpy # noqa # The errstate should be unchanged after that import. tm.assert_equal(np.geterr(), expected)
def test_array_not_registered(registry_without_decimal): # check we aren't on it assert registry.find('decimal') is None data = [decimal.Decimal('1'), decimal.Decimal('2')] result = pd.array(data, dtype=DecimalDtype) expected = DecimalArray._from_sequence(data) tm.assert_equal(result, expected)
def test_to_csv_with_mix_columns(self): # GH11637, incorrect output when a mix of integer and string column # names passed as columns parameter in to_csv df = DataFrame({0: ['a', 'b', 'c'], 1: ['aa', 'bb', 'cc']}) df['test'] = 'txt' assert_equal(df.to_csv(), df.to_csv(columns=[0, 1, 'test']))
def test_numpy_errstate_is_default(): # The defaults since numpy 1.6.0 expected = {'over': 'warn', 'divide': 'warn', 'invalid': 'warn', 'under': 'ignore'} import numpy as np from pandas.compat import numpy # noqa # The errstate should be unchanged after that import. tm.assert_equal(np.geterr(), expected)
def test_index_column(self): query = "SELECT 'a' as STRING_1, 'b' as STRING_2" result_frame = gbq.read_gbq(query, project_id=_get_project_id(), index_col="STRING_1", private_key=_get_private_key_path()) correct_frame = DataFrame( {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1") tm.assert_equal(result_frame.index.name, correct_frame.index.name)
def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) self.assertFalse(df._is_mixed_type) assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False) # GH 7445 result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns') expected = '{"test":{}}' tm.assert_equal(result, expected)
def test_plot_samples(pca, kwargs): from flotilla.visualize.decomposition import DecompositionViz dv = DecompositionViz(pca.reduced_space, pca.components_, pca.explained_variance_ratio_, **kwargs) dv.plot_samples() ax = plt.gca() pdt.assert_equal(len(ax.lines), kwargs['n_vectors'] + 1) plt.close('all')
def test_plot_loadings(pca, kwargs): from flotilla.visualize.decomposition import DecompositionViz dv = DecompositionViz(pca.reduced_space, pca.components_, pca.explained_variance_ratio_, **kwargs) dv.plot_loadings() ax = plt.gca() pdt.assert_equal(len(ax.collections), 1) plt.close('all')
def test_repeat_preserves_tz(self): dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central') arr = DatetimeArray(dti) repeated = arr.repeat([1, 1]) # preserves tz and values, but not freq expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype) tm.assert_equal(repeated, expected)
def test_pow_float(self, op, idx, box): # test power calculations both ways, GH#14973 expected = pd.Float64Index(op(idx.values, 2.0)) idx = tm.box_expected(idx, box) expected = tm.box_expected(expected, box) result = op(idx, 2.0) tm.assert_equal(result, expected)
def test_nmf_space_ylabel(self, splicing, groupby): test_ylabel = splicing._nmf_space_ylabel(groupby) if splicing._is_nmf_space_x_axis_excluded(groupby): true_ylabel = splicing.included_label else: true_ylabel = splicing.excluded_label pdt.assert_equal(test_ylabel, true_ylabel)
def test_modulo(self, idx, box): # GH#9244 expected = Index(idx.values % 2) idx = tm.box_expected(idx, box) expected = tm.box_expected(expected, box) result = idx % 2 tm.assert_equal(result, expected)
def test_array_unboxes(box): data = box([decimal.Decimal('1'), decimal.Decimal('2')]) # make sure it works with pytest.raises(TypeError): DecimalArray2._from_sequence(data) result = pd.array(data, dtype='decimal2') expected = DecimalArray2._from_sequence(data.values) tm.assert_equal(result, expected)
def test_objarr_add_str(self, box): ser = pd.Series(['x', np.nan, 'x']) expected = pd.Series(['xa', np.nan, 'xa']) ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) result = ser + 'a' tm.assert_equal(result, expected)
def test_guess_modality(self, event, estimator): logsumexps = estimator._logsumexp(estimator._loglik(event)) test_guess_modality = estimator._guess_modality(logsumexps) logsumexps['uniform'] = estimator.logbf_thresh true_guess_modality = logsumexps.idxmax() pdt.assert_equal(test_guess_modality, true_guess_modality)
def test_uint_index_does_not_convert_to_float64(): # https://github.com/pandas-dev/pandas/issues/28279 # https://github.com/pandas-dev/pandas/issues/28023 series = pd.Series( [0, 1, 2, 3, 4, 5], index=[ 7606741985629028552, 17876870360202815256, 17876870360202815256, 13106359306506049338, 8991270399732411471, 8991270399732411472, ], ) result = series.loc[[7606741985629028552, 17876870360202815256]] expected = UInt64Index( [7606741985629028552, 17876870360202815256, 17876870360202815256], dtype="uint64", ) tm.assert_index_equal(result.index, expected) tm.assert_equal(result, series[:3])
def assert_invalid_comparison(left, right, box): """ Assert that comparison operations with mismatched types behave correctly. Parameters ---------- left : np.ndarray, ExtensionArray, Index, or Series right : object box : {pd.DataFrame, pd.Series, pd.Index, tm.to_array} """ # Not for tznaive-tzaware comparison # Note: not quite the same as how we do this for tm.box_expected xbox = box if box is not Index else np.array result = left == right expected = xbox(np.zeros(result.shape, dtype=np.bool_)) tm.assert_equal(result, expected) result = right == left tm.assert_equal(result, expected) result = left != right tm.assert_equal(result, ~expected) result = right != left tm.assert_equal(result, ~expected) msg = "Invalid comparison between" with pytest.raises(TypeError, match=msg): left < right with pytest.raises(TypeError, match=msg): left <= right with pytest.raises(TypeError, match=msg): left > right with pytest.raises(TypeError, match=msg): left >= right with pytest.raises(TypeError, match=msg): right < left with pytest.raises(TypeError, match=msg): right <= left with pytest.raises(TypeError, match=msg): right > left with pytest.raises(TypeError, match=msg): right >= left
def test__init(self, expression_data_no_na, outliers): from flotilla.data_model.base import BaseData from flotilla.compute.predict import PredictorConfigManager, \ PredictorDataSetManager base_data = BaseData(expression_data_no_na, outliers=outliers) outlier_samples = outliers.copy() if outliers is not None else [] outliers_df = expression_data_no_na.ix[outlier_samples] feature_renamer_series = pd.Series(expression_data_no_na.columns, index=expression_data_no_na.columns) pdt.assert_frame_equal(base_data.data_original, expression_data_no_na) pdt.assert_equal(base_data.feature_data, None) pdt.assert_frame_equal(base_data.data, expression_data_no_na) pdt.assert_series_equal(base_data.feature_renamer_series, feature_renamer_series) pdt.assert_frame_equal(base_data.outliers, outliers_df) pdt.assert_numpy_array_equal(base_data.outlier_samples, outlier_samples) assert isinstance(base_data.predictor_config_manager, PredictorConfigManager) assert isinstance(base_data.predictor_dataset_manager, PredictorDataSetManager)
def test_read_write_dta11(self): original = DataFrame([(1, 2, 3, 4)], columns=[ 'good', compat.u('b\u00E4d'), '8number', 'astringwithmorethan32characters______' ]) formatted = DataFrame([(1, 2, 3, 4)], columns=[ 'good', 'b_d', '_8number', 'astringwithmorethan32characters_' ]) formatted.index.name = 'index' formatted = formatted.astype(np.int32) with tm.ensure_clean() as path: with warnings.catch_warnings(record=True) as w: original.to_stata(path, None) # should get a warning for that format. tm.assert_equal(len(w), 1) written_and_read_again = self.read_dta(path) tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted)
def test_compare_nucleotide_seqs(nucleotide_seq1, nucleotide_seq2, ksizes): from orpheum.compare_kmer_content import compare_nucleotide_seqs id_seq1 = "seq1", nucleotide_seq1 id_seq2 = "seq2", nucleotide_seq2 test = compare_nucleotide_seqs(id_seq1, id_seq2, ksizes) s = """id1,id2,ksize,jaccard,alphabet seq1,seq2,2,1.0,purine_pyrimidine seq1,seq2,3,0.8,purine_pyrimidine seq1,seq2,4,0.25,purine_pyrimidine seq1,seq2,2,1.0,nucleotide seq1,seq2,3,0.8,nucleotide seq1,seq2,4,0.25,nucleotide seq1,seq2,2,1.0,weak_strong seq1,seq2,3,1.0,weak_strong seq1,seq2,4,1.0,weak_strong seq1,seq2,2,1.0,amino_keto seq1,seq2,3,1.0,amino_keto seq1,seq2,4,0.75,amino_keto """ true = pd.read_csv(StringIO(s)) pdt.assert_equal(test, true)
def test_plot_loadings_scatter(pca, kwargs): from flotilla.visualize.decomposition import DecompositionViz dv = DecompositionViz(pca.reduced_space, pca.components_, pca.explained_variance_ratio_, **kwargs) dv.plot(plot_loadings='scatter') pdt.assert_equal(len(dv.fig_reduced.axes), 3) pdt.assert_equal(len(dv.ax_loading1.collections), 1) pdt.assert_equal(len(dv.ax_loading1.collections), 1) plt.close('all')
def test_td64arr_add_sub_tdi(self, box, names): # GH#17250 make sure result dtype is correct # GH#19043 make sure names are propagated correctly tdi = TimedeltaIndex(['0 days', '1 day'], name=names[0]) ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], name=names[2]) ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) result = tdi + ser tm.assert_equal(result, expected) if box is not pd.DataFrame: assert result.dtype == 'timedelta64[ns]' else: assert result.dtypes[0] == 'timedelta64[ns]' result = ser + tdi tm.assert_equal(result, expected) if box is not pd.DataFrame: assert result.dtype == 'timedelta64[ns]' else: assert result.dtypes[0] == 'timedelta64[ns]' expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)], name=names[2]) expected = tm.box_expected(expected, box) result = tdi - ser tm.assert_equal(result, expected) if box is not pd.DataFrame: assert result.dtype == 'timedelta64[ns]' else: assert result.dtypes[0] == 'timedelta64[ns]' result = ser - tdi tm.assert_equal(result, -expected) if box is not pd.DataFrame: assert result.dtype == 'timedelta64[ns]' else: assert result.dtypes[0] == 'timedelta64[ns]'
def test_td64arr_mul_numeric_scalar(self, box, one, tdser): # GH#4521 # divide/multiply by integers expected = Series(['-59 Days', '-59 Days', 'NaT'], dtype='timedelta64[ns]') tdser = tm.box_expected(tdser, box) expected = tm.box_expected(expected, box) result = tdser * (-one) tm.assert_equal(result, expected) result = (-one) * tdser tm.assert_equal(result, expected) expected = Series(['118 Days', '118 Days', 'NaT'], dtype='timedelta64[ns]') expected = tm.box_expected(expected, box) result = tdser * (2 * one) tm.assert_equal(result, expected) result = (2 * one) * tdser tm.assert_equal(result, expected)
def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): pi = pd.period_range("1994-04-01", periods=9, freq="19D") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") obj = tm.box_expected(pi, box_with_array) expected = tm.box_expected(expected, box_with_array) result = obj + other tm.assert_equal(result, expected) result = other + obj tm.assert_equal(result, expected) result = obj - other tm.assert_equal(result, expected) with pytest.raises(TypeError): other - obj
def test___init__(self, junction_exon_triples, graph_items): from outrigger.index.events import SpliceGraph test = SpliceGraph(junction_exon_triples) graph, items = graph_items exons = tuple(junction_exon_triples.exon.unique()) junctions = tuple(junction_exon_triples.junction.unique()) pdt.assert_equal(test.exons, exons) pdt.assert_equal(test.junctions, junctions) pdt.assert_equal(sorted(test.items), sorted(items)) assert_graph_items_equal(test.graph, test.items, graph, items)
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 tm.skip_if_no_package('scipy') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) # Ensure dtype is preserved if possible was_upcast = ((fill_value is None or is_float(fill_value)) and not is_object_dtype(dtype) and not is_float_dtype(dtype)) res_dtype = (bool if is_bool_dtype(dtype) else float if was_upcast else dtype) tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) tm.assert_equal(sdf.to_coo().dtype, res_dtype) # However, adding a str column results in an upcast to object sdf['strings'] = np.arange(len(sdf)).astype(str) tm.assert_equal(sdf.to_coo().dtype, np.object_)
def test_td64arr_add_sub_timestamp(self, box): # GH#11925 ts = Timestamp('2012-01-01') # TODO: parametrize over types of datetime scalar? tdser = Series(pd.timedelta_range('1 day', periods=3)) expected = Series(pd.date_range('2012-01-02', periods=3)) tdser = tm.box_expected(tdser, box) expected = tm.box_expected(expected, box) tm.assert_equal(ts + tdser, expected) tm.assert_equal(tdser + ts, expected) expected2 = Series(pd.date_range('2011-12-31', periods=3, freq='-1D')) expected2 = tm.box_expected(expected2, box) tm.assert_equal(ts - tdser, expected2) tm.assert_equal(ts + (-tdser), expected2) with pytest.raises(TypeError): tdser - ts
def test_parr_add_sub_td64_nat(self, box): # GH#23320 special handling for timedelta64("NaT") pi = pd.period_range("1994-04-01", periods=9, freq="19D") other = np.timedelta64("NaT") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") obj = tm.box_expected(pi, box) expected = tm.box_expected(expected, box) result = obj + other tm.assert_equal(result, expected) result = other + obj tm.assert_equal(result, expected) result = obj - other tm.assert_equal(result, expected) with pytest.raises(TypeError): other - obj
def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other): # FIXME: DataFrame fails because when when operating column-wise # timedelta64 entries become NaT and are treated like datetimes box = box_df_fail pi = pd.period_range("1994-04-01", periods=9, freq="19D") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") obj = tm.box_expected(pi, box) expected = tm.box_expected(expected, box) result = obj + other tm.assert_equal(result, expected) result = other + obj tm.assert_equal(result, expected) result = obj - other tm.assert_equal(result, expected) with pytest.raises(TypeError): other - obj
def test_subclass_sparse_slice(self): rows = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] ssdf = tm.SubclassedSparseDataFrame(rows) ssdf.testattr = "testattr" tm.assert_sp_frame_equal(ssdf.loc[:2], tm.SubclassedSparseDataFrame(rows[:3])) tm.assert_sp_frame_equal(ssdf.iloc[:2], tm.SubclassedSparseDataFrame(rows[:2])) tm.assert_sp_frame_equal(ssdf[:2], tm.SubclassedSparseDataFrame(rows[:2])) tm.assert_equal(ssdf.loc[:2].testattr, "testattr") tm.assert_equal(ssdf.iloc[:2].testattr, "testattr") tm.assert_equal(ssdf[:2].testattr, "testattr") tm.assert_sp_series_equal(ssdf.loc[1], tm.SubclassedSparseSeries(rows[1]), check_names=False) tm.assert_sp_series_equal(ssdf.iloc[1], tm.SubclassedSparseSeries(rows[1]), check_names=False)
def test_init(self, expression_data_no_na, expression_log_base, expression_plus_one, expression_thresh): from flotilla.data_model import ExpressionData expression = ExpressionData(expression_data_no_na.copy(), log_base=expression_log_base, plus_one=expression_plus_one, thresh=expression_thresh) data = expression_data_no_na.copy() thresh = float(expression_thresh) if expression_plus_one: data += 1 thresh += 1 if expression_log_base is not None: data = np.divide(np.log(data), np.log(expression_log_base)) pdt.assert_equal(expression.plus_one, expression_plus_one) pdt.assert_equal(expression.log_base, expression_log_base) pdt.assert_equal(expression.thresh, thresh) pdt.assert_frame_equal(expression.data_original, expression_data_no_na) pdt.assert_frame_equal(expression.data, data)
def test_arith_zero_dim_ndarray(self, other): arr = integer_array([1, None, 2]) result = arr + np.array(other) expected = arr + other tm.assert_equal(result, expected)
def test_constructor_unwraps_index(self, indices): if isinstance(indices, pd.MultiIndex): raise pytest.skip("MultiIndex has no ._data") a = indices b = type(a)(a) tm.assert_equal(a._data, b._data)
def _validate_periodindex(self, pickled, current): tm.assert_index_equal(pickled, current) tm.assertIsInstance(pickled.freq, MonthEnd) tm.assert_equal(pickled.freq, MonthEnd()) tm.assert_equal(pickled.freqstr, 'M') tm.assert_index_equal(pickled.shift(2), current.shift(2))
def test_parr_cmp_period_scalar(self, freq, box_with_array): # GH#13200 xbox = np.ndarray if box_with_array is pd.Index else box_with_array base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq=freq) base = tm.box_expected(base, box_with_array) per = Period('2011-02', freq=freq) exp = np.array([False, True, False, False]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base == per, exp) tm.assert_equal(per == base, exp) exp = np.array([True, False, True, True]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base != per, exp) tm.assert_equal(per != base, exp) exp = np.array([False, False, True, True]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base > per, exp) tm.assert_equal(per < base, exp) exp = np.array([True, False, False, False]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base < per, exp) tm.assert_equal(per > base, exp) exp = np.array([False, True, True, True]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base >= per, exp) tm.assert_equal(per <= base, exp) exp = np.array([True, True, False, False]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base <= per, exp) tm.assert_equal(per >= base, exp)
def test_parr_cmp_pi(self, freq, box_with_array): # GH#13200 xbox = np.ndarray if box_with_array is pd.Index else box_with_array base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq=freq) base = tm.box_expected(base, box_with_array) # TODO: could also box idx? idx = PeriodIndex(['2011-02', '2011-01', '2011-03', '2011-05'], freq=freq) exp = np.array([False, False, True, False]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base == idx, exp) exp = np.array([True, True, False, True]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base != idx, exp) exp = np.array([False, True, False, False]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base > idx, exp) exp = np.array([True, False, False, True]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base < idx, exp) exp = np.array([False, True, True, False]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base >= idx, exp) exp = np.array([True, False, True, True]) exp = tm.box_expected(exp, xbox) tm.assert_equal(base <= idx, exp)
def test_ufunc_coercions(self, holder): idx = holder([1, 2, 3, 4, 5], name="x") box = pd.Series if holder is pd.Series else pd.Index result = np.sqrt(idx) assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = np.divide(idx, 2.0) assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) # _evaluate_numeric_binop result = idx + 2.0 assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([3.0, 4.0, 5.0, 6.0, 7.0], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx - 2.0 assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx * 1.0 assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([1.0, 2.0, 3.0, 4.0, 5.0], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx / 2.0 assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp)
def compare_series_ts(result, expected, typ, version): # GH 7748 tm.assert_series_equal(result, expected) tm.assert_equal(result.index.freq, expected.index.freq) tm.assert_equal(result.index.freq.normalize, False) tm.assert_series_equal(result > 0, expected > 0) # GH 9291 freq = result.index.freq tm.assert_equal(freq + Day(1), Day(2)) res = freq + pandas.Timedelta(hours=1) tm.assert_equal(isinstance(res, pandas.Timedelta), True) tm.assert_equal(res, pandas.Timedelta(days=1, hours=1)) res = freq + pandas.Timedelta(nanoseconds=1) tm.assert_equal(isinstance(res, pandas.Timedelta), True) tm.assert_equal(res, pandas.Timedelta(days=1, nanoseconds=1))
def test_ufunc_coercions(self, holder): idx = holder([1, 2, 3, 4, 5], name='x') box = pd.Series if holder is pd.Series else pd.Index result = np.sqrt(idx) assert result.dtype == 'f8' and isinstance(result, box) exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = np.divide(idx, 2.) assert result.dtype == 'f8' and isinstance(result, box) exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) # _evaluate_numeric_binop result = idx + 2. assert result.dtype == 'f8' and isinstance(result, box) exp = pd.Float64Index([3., 4., 5., 6., 7.], name='x') exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx - 2. assert result.dtype == 'f8' and isinstance(result, box) exp = pd.Float64Index([-1., 0., 1., 2., 3.], name='x') exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx * 1. assert result.dtype == 'f8' and isinstance(result, box) exp = pd.Float64Index([1., 2., 3., 4., 5.], name='x') exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx / 2. assert result.dtype == 'f8' and isinstance(result, box) exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') exp = tm.box_expected(exp, box) tm.assert_equal(result, exp)
def test_mul_int_identity(self, op, numeric_idx, box): idx = numeric_idx idx = tm.box_expected(idx, box) result = op(idx, 1) tm.assert_equal(result, idx)
def test_detect_intraday(self, positions, transactions, expected): detected = detect_intraday(positions, transactions, threshold=0.25) assert_equal(detected, expected)
def _assert_same_values_and_dtype(res, exp): tm.assert_equal(res.dtype, exp.dtype) tm.assert_almost_equal(res, exp)
def test_estimate_intraday(self, returns, positions, transactions, expected): intraday_pos = estimate_intraday(returns, positions, transactions) assert_equal(intraday_pos.shape, expected)
def compare_index_period(result, expected, typ, version): tm.assert_index_equal(result, expected) tm.assertIsInstance(result.freq, MonthEnd) tm.assert_equal(result.freq, MonthEnd()) tm.assert_equal(result.freqstr, 'M') tm.assert_index_equal(result.shift(2), expected.shift(2))
def test_null_transformer2(data): """Checks impute_algorithm='ts_interpolate'""" null_transform = NullTransformer(impute_algorithm="ts_interpolate", impute_all=False) null_transform.fit(data) assert null_transform.impute_params == dict(orders=[7, 14, 21], agg_func=np.mean, iter_num=5) result = null_transform.transform(data) # `orders` is too large for this dataset, nothing is imputed assert_equal(result, data) # two iterations null_transform = NullTransformer(impute_algorithm="ts_interpolate", impute_params=dict(orders=[1], agg_func=np.nanmean, iter_num=2), impute_all=False) result = null_transform.fit_transform(data) expected = pd.DataFrame({ "a": (0.0, 0.0, -1.0, 1.0), "b": (np.nan, 2.0, 2.0, 2.0), "c": (2.0, 3.0, 3.0, 9.0), "d": (np.nan, 4.0, -4.0, 16.0), }) assert_equal(result, expected) assert null_transform.missing_info == { "a": { "initial_missing_num": 1, "final_missing_num": 0 }, "b": { "initial_missing_num": 3, "final_missing_num": 1 }, "c": { "initial_missing_num": 1, "final_missing_num": 0 }, "d": { "initial_missing_num": 1, "final_missing_num": 1 }, } # impute_all=True null_transform = NullTransformer(impute_algorithm="ts_interpolate", impute_params=dict(orders=[1], agg_func=np.nanmean, iter_num=2), impute_all=True) result = null_transform.fit_transform(data) expected = pd.DataFrame({ "a": (0.0, 0.0, -1.0, 1.0), "b": (2.0, 2.0, 2.0, 2.0), "c": (2.0, 3.0, 3.0, 9.0), "d": (4.0, 4.0, -4.0, 16.0), }) assert_equal(result, expected) # `final_missing_num` are filled in by the second pass. # The counts reflect the first pass. assert null_transform.missing_info == { "a": { "initial_missing_num": 1, "final_missing_num": 0 }, "b": { "initial_missing_num": 3, "final_missing_num": 1 }, "c": { "initial_missing_num": 1, "final_missing_num": 0 }, "d": { "initial_missing_num": 1, "final_missing_num": 1 }, }