def test_equals(arr, idx): s1 = Series(arr, index=idx) s2 = s1.copy() assert s1.equals(s2) s1[1] = 9 assert not s1.equals(s2)
def test_equals_list_array(val): # GH20676 Verify equals operator for list of Numpy arrays arr = np.array([1, 2]) s1 = Series([arr, arr]) s2 = s1.copy() assert s1.equals(s2) s1[1] = val cm = (tm.assert_produces_warning(FutureWarning, check_stacklevel=False) if isinstance(val, str) else nullcontext()) with cm: assert not s1.equals(s2)
def test_equals_None_vs_float(): # GH#44190 left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object) right = Series([None] * len(left)) # these series were found to be equal due to a bug, check that they are correctly # found to not equal assert not left.equals(right) assert not right.equals(left) assert not left.to_frame().equals(right.to_frame()) assert not right.to_frame().equals(left.to_frame()) assert not Index(left, dtype="object").equals(Index(right, dtype="object")) assert not Index(right, dtype="object").equals(Index(left, dtype="object"))
def test_set_alpha_with_array(self): m = Multinomial(n=10, p=[0.1, 0.2, 0.3, 0.4]) expected = Series({'p1': 0.4, 'p2': 0.3, 'p3': 0.2, 'p4': 0.1}) m.p = [0.4, 0.3, 0.2, 0.1] actual = m.p self.assertTrue(expected.equals(actual))
def test_calc_hash_without_hashing_for_single_column(self): columns = Series(['1000']) expected_columns = Series(['1000']) actual_columns = self.dbt_test_utils.calc_hash(columns) assert expected_columns.equals(actual_columns)
def test_set_alpha_with_series(self): m = Multinomial(n=10, p=[0.1, 0.2, 0.3, 0.4]) expected = Series({'x1': 0.4, 'x2': 0.3, 'x3': 0.2, 'x4': 0.1}) m.p = expected actual = m.p self.assertTrue(expected.equals(actual))
def test_calc_hash_for_single_md5(self): columns = Series(["md5('1000')"]) expected_hashes = Series(['A9B7BA70783B617E9998DC4DD82EB3C5']) actual_hashes = self.dbt_test_utils.calc_hash(columns) assert expected_hashes.equals(actual_hashes)
def test_set_alpha_with_array(self): d = Dirichlet([0.1, 0.2, 0.3, 0.4]) expected = Series({'α1': 0.4, 'α2': 0.3, 'α3': 0.2, 'α4': 0.1}) d.alpha = [0.4, 0.3, 0.2, 0.1] actual = d.alpha self.assertTrue(expected.equals(actual))
def test_equals_false_negative(): # GH8437 Verify false negative behavior of equals function for dtype object arr = [False, np.nan] s1 = Series(arr) s2 = s1.copy() s3 = Series(index=range(2), dtype=object) s4 = s3.copy() s5 = s3.copy() s6 = s3.copy() s3[:-1] = s4[:-1] = s5[0] = s6[0] = False assert s1.equals(s1) assert s1.equals(s2) assert s1.equals(s3) assert s1.equals(s4) assert s1.equals(s5) assert s5.equals(s6)
def test_equals_none_vs_nan(): # GH#39650 ser = Series([1, None], dtype=object) ser2 = Series([1, np.nan], dtype=object) assert ser.equals(ser2) assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype)) assert ser.array.equals(ser2.array)
def test_counts(self): counts = Series({'apples': 3, 'bananas': 2, 'cherries': 1}) counts_apples_any = Series({'apples': 3, 'bananas': 2, 'cherries': 1}) counts_bananas_any = Series({'apples': 2, 'bananas': 2, 'cherries': 1}) counts_apples_all = Series({'apples': 3, 'bananas': 2, 'cherries': 1}) counts_bananas_all = Series({'apples': 2, 'bananas': 2, 'cherries': 1}) count_bc_any = Series({'apples': 2, 'bananas': 2, 'cherries': 1}) count_bc_all = Series({'apples': 1, 'bananas': 1, 'cherries': 1}) self.assertTrue(counts.equals(self.question.counts())) self.assertTrue(counts_apples_any.equals( self.question.counts('apples', 'any') )) self.assertTrue(counts_bananas_any.equals( self.question.counts('bananas', 'any') )) self.assertTrue(counts_apples_all.equals( self.question.counts('apples', 'all') )) self.assertTrue(counts_bananas_all.equals( self.question.counts('bananas', 'all') )) self.assertTrue(count_bc_any.equals( self.question.counts(['bananas', 'cherries'], 'any') )) self.assertTrue(count_bc_all.equals( self.question.counts(['bananas', 'cherries'], 'all') ))
def test_calc_hash_for_single_sha256(self): columns = Series(["sha('1000')"]) expected_hashes = Series([ '40510175845988F13F6162ED8526F0B09F73384467FA855E1E79B44A56562A58' ]) actual_hashes = self.dbt_test_utils.calc_hash(columns) assert expected_hashes.equals(actual_hashes)
def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2): # GH#39650 left = nulls_fixture right = nulls_fixture2 if hasattr(right, "copy"): right = right.copy() else: right = copy.copy(right) ser = Series([left], dtype=object) ser2 = Series([right], dtype=object) if is_matching_na(left, right): assert ser.equals(ser2) elif (left is None and is_float(right)) or (right is None and is_float(left)): assert ser.equals(ser2) else: assert not ser.equals(ser2)
def test_equals_matching_nas(): # matching but not identical NAs left = Series([np.datetime64("NaT")], dtype=object) right = Series([np.datetime64("NaT")], dtype=object) assert left.equals(right) assert Index(left).equals(Index(right)) assert left.array.equals(right.array) left = Series([np.timedelta64("NaT")], dtype=object) right = Series([np.timedelta64("NaT")], dtype=object) assert left.equals(right) assert Index(left).equals(Index(right)) assert left.array.equals(right.array) left = Series([np.float64("NaN")], dtype=object) right = Series([np.float64("NaN")], dtype=object) assert left.equals(right) assert Index(left).equals(Index(right)) assert left.array.equals(right.array)
def test_sorting(transf_type: str, ts_input: Series, ts_correct: Series) -> None: """ Test to verify that the ts inputs are sorted and thus equal to ts correct. :param transf_type: String. FP for "fp" (fit-predict), P for "p" (predict). :param ts_input: Series. Timeseries to be transformed. :param ts_correct: Series. Timeseries that is the correct output of the transformation. """ ts_out = _sort(transf_type, ts_input) assert ts_correct.equals(ts_out)
def test_min_value_validator(): series = Series([1, 2, 3, 4, 5], dtype='int') min_validator_1 = MinValueValidator(2) assert min_validator_1(series) == False assert series.equals(min_validator_1.value) min_validator_1.clear() assert min_validator_1.value == None min_validator_2 = MinValueValidator(1) assert min_validator_2(series) == True assert series.equals(min_validator_2.value) min_validator_2.clear() assert min_validator_2.value == None
def test_max_value_validator(): series = Series([1, 2, 3, 4, 5], dtype='int') max_validator_1 = MaxValueValidator(4) assert max_validator_1(series) == False assert series.equals(max_validator_1.value) max_validator_1.clear() assert max_validator_1.value == None max_validator_2 = MaxValueValidator(5) assert max_validator_2(series) == True assert series.equals(max_validator_2.value) max_validator_2.clear() assert max_validator_2.value == None
def test_calc_hash_for_multiple_sha256(self): columns = Series(["sha('1000')", "sha('2000')", "sha('3000')"]) expected_hashes = Series([ '40510175845988F13F6162ED8526F0B09F73384467FA855E1E79B44A56562A58', '81A83544CF93C245178CBC1620030F1123F435AF867C79D87135983C52AB39D9', 'A176EEB31E601C3877C87C2843A2F584968975269E369D5C86788B4C2F92D2A2' ]) actual_hashes = self.dbt_test_utils.calc_hash(columns) assert expected_hashes.equals(actual_hashes)
def test_calc_hash_for_multiple_md5(self): columns = Series(["md5('1000')", "md5('2000')", "md5('3000')"]) expected_hashes = Series([ 'A9B7BA70783B617E9998DC4DD82EB3C5', '08F90C1A417155361A5C4B8D297E0D78', 'E93028BDC1AACDFB3687181F2031765D' ]) actual_hashes = self.dbt_test_utils.calc_hash(columns) assert expected_hashes.equals(actual_hashes)
def test_max_length_validator(): series_1 = Series(["abcdef", "abcd"], dtype='str') max_length_validator_1 = MaxLengthValidator(max_length=3) assert max_length_validator_1(series_1) == False assert series_1.equals(max_length_validator_1.value) max_length_validator_1.clear() assert max_length_validator_1.value == None series_2 = Series(["abcdef", "abcd"], dtype='str') max_length_validator_2 = MaxLengthValidator(max_length=10) assert max_length_validator_2(series_2) == True assert series_2.equals(max_length_validator_2.value) max_length_validator_2.clear() assert max_length_validator_2.value == None
def test_pandas_series(): for name, col in COLUMNS.items(): ds = Series(data=col, name=name) txt = dumps(ds, allow_nan=True) back = loads(txt) assert (ds.equals(back)) assert ds.dtype == back.dtype for name, col in COLUMNS.items(): ds = Series(data=col, name=name) txt = dumps(ds, primitives=True, allow_nan=True) back = loads(txt) assert isinstance(back, dict) assert_equal(ds.index.values, back['index']) assert_equal(ds.values, back['data'])
def test_constructor_copy(self): # GH15125 # test dtype parameter has no side effects on copy=True for data in [[1.0], np.array([1.0])]: x = Series(data) y = pd.Series(x, copy=True, dtype=float) # copy=True maintains original data in Series tm.assert_series_equal(x, y) # changes to origin of copy does not affect the copy x[0] = 2.0 assert not x.equals(y) assert x[0] == 2.0 assert y[0] == 1.0
def test_constructor_copy(self): # GH15125 # test dtype parameter has no side effects on copy=True for data in [[1.], np.array([1.])]: x = Series(data) y = pd.Series(x, copy=True, dtype=float) # copy=True maintains original data in Series tm.assert_series_equal(x, y) # changes to origin of copy does not affect the copy x[0] = 2. assert not x.equals(y) assert x[0] == 2. assert y[0] == 1.
def is_included_in_repr(d: pd.Series, train_data: pd.DataFrame): """ returns objects from train dataset(from train pos and neg data) that is included in d representation returns pd.DataFrame or None """ d_list = [] for i, obj in train_data.iterrows(): feature_repr = similarity(obj, d) is_included = d.equals(feature_repr) if is_included: d_list.append(obj) d_list = pd.DataFrame(d_list) if len(d_list) > 0 else None return d_list
def check_order_session(df_group: DataFrame, logger: logging.Logger, task_id: str = None): mask_start = df_group[wc.COL_TYPE] == wc.TOKEN_START mask_stop = df_group[wc.COL_TYPE] == wc.TOKEN_STOP # Roll start_mask series n -> n+1, but keep the index the same. # This can be used to later compare if both, the shifted start_mask series # and the mask_stop series are the same, which must be the case if both # only contain alternating values as it should be in a healthy log. shifted_mask_start = Series(np.roll(mask_start.values, 1), index=mask_start.index) date = df_group.date.iloc[0] if mask_start.sum() < mask_stop.sum(): if task_id is None: logger.error( ErrMsg.MISSING_SESSION_ENTRY.value.format(type=wc.TOKEN_START, date=date)) else: logger.error( ErrMsg.MISSING_TASK_ENTRY.value.format(type=wc.TOKEN_START, date=date, task_id=task_id)) elif mask_start.sum() > mask_stop.sum(): if task_id is None: logger.error( ErrMsg.MISSING_SESSION_ENTRY.value.format(type=wc.TOKEN_STOP, date=date)) else: logger.error( ErrMsg.MISSING_TASK_ENTRY.value.format(type=wc.TOKEN_STOP, date=date, task_id=task_id)) # First compare if the first entry is a start entry and then see if both # the shifted_start_mask series and the mask_stop series have the same # values. See above for an explanation. elif int(mask_start.iloc[0]) != 1 or not shifted_mask_start.equals( mask_stop): if task_id is None: logger.error(ErrMsg.WRONG_SESSION_ORDER.value.format(date=date)) else: logger.error( ErrMsg.WRONG_TASK_ORDER.value.format(date=date, task_id=task_id))
def is_entry_new(entry: pd.Series) -> bool: """ Determines if an entry has new information. For that, it must either: a. Be a new trip (trip name not in the database). b. Have at least one different value from the latest entry for that trip, and Have a more recent date_retrieved than the existing one. Args: entry: The log entry to be compared. Returns: Whether it's new or not. """ existing_entry = self.read_latest_entry_for_trip(entry['Viagem']) if existing_entry is not None: no_changes = entry.equals( existing_entry.drop('date_retrieved')) if no_changes: return False existing_date_retrieved = existing_entry[ 'date_retrieved'].to_pydatetime() if existing_date_retrieved >= date_retrieved: return False return True
def test_counts(self): all_counts = Series({ 'apples': 3, 'bananas': 2, 'cherries': 1, 'dates': 0 }) apple_counts = Series({'apples': 3}) apple_banana_counts = Series({'apples': 3, 'bananas': 2}) self.assertTrue(all_counts.equals(self.question.counts())) self.assertTrue(all_counts.equals(self.question_with_nulls.counts())) self.assertTrue(apple_counts.equals(self.question.counts('apples'))) self.assertTrue( apple_counts.equals(self.question_with_nulls.counts('apples'))) self.assertTrue( apple_banana_counts.equals( self.question.counts(['apples', 'bananas']))) self.assertTrue( apple_banana_counts.equals( self.question_with_nulls.counts(['apples', 'bananas'])))
def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) s2 = s1.copy() assert s1.equals(s2) s1[1] = 99 assert not s1.equals(s2) # NaNs compare as equal s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) s2 = s1.copy() assert s1.equals(s2) s2[0] = 9.9 assert not s1.equals(s2) idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")]) s1 = Series([1, 2, np.nan], index=idx) s2 = s1.copy() assert s1.equals(s2) # Add object dtype column with nans index = np.random.random(10) df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) df1["text"] = "the sky is so blue. we could use more chocolate.".split() df1["start"] = date_range("2000-1-1", periods=10, freq="T") df1["end"] = date_range("2000-1-1", periods=10, freq="D") df1["diff"] = df1["end"] - df1["start"] df1["bool"] = np.arange(10) % 3 == 0 df1.loc[::2] = np.nan df2 = df1.copy() assert df1["text"].equals(df2["text"]) assert df1["start"].equals(df2["start"]) assert df1["end"].equals(df2["end"]) assert df1["diff"].equals(df2["diff"]) assert df1["bool"].equals(df2["bool"]) assert df1.equals(df2) assert not df1.equals(object) # different dtype different = df1.copy() different["floats"] = different["floats"].astype("float32") assert not df1.equals(different) # different index different_index = -index different = df2.set_index(different_index) assert not df1.equals(different) # different columns different = df2.copy() different.columns = df2.columns[::-1] assert not df1.equals(different) # DatetimeIndex index = pd.date_range("2000-1-1", periods=10, freq="T") df1 = df1.set_index(index) df2 = df1.copy() assert df1.equals(df2) # MultiIndex df3 = df1.set_index(["text"], append=True) df2 = df1.set_index(["text"], append=True) assert df3.equals(df2) df2 = df1.set_index(["floats"], append=True) assert not df3.equals(df2) # NaN in index df3 = df1.set_index(["floats"], append=True) df2 = df1.set_index(["floats"], append=True) assert df3.equals(df2) # GH 8437 a = pd.Series([False, np.nan]) b = pd.Series([False, np.nan]) c = pd.Series(index=range(2), dtype=object) d = c.copy() e = c.copy() f = c.copy() c[:-1] = d[:-1] = e[0] = f[0] = False assert a.equals(a) assert a.equals(b) assert a.equals(c) assert a.equals(d) assert a.equals(e) assert e.equals(f)
def test_combined_spID_single_series(): """Test that a single column data frame/series returns the same""" ids = Series(['gen1 sp1', 'gen1 sp2', 'gen2 sp3']) assert ids.equals(combined_spID(ids))
def test_combined_spID_multiple_series(): """Test that multiple series return the right combined series""" ids1 = Series(['gen1', 'gen1', 'gen2', 'gen2']) ids2 = Series(['sp1', 'sp1', 'sp2', 'sp3']) combined_ids = Series(['gen1sp1', 'gen1sp1', 'gen2sp2', 'gen2sp3']) assert combined_ids.equals(combined_spID(ids1, ids2))
print a.difference(b) == a - b import numpy as np a = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)]) print a.shape import numpy as np a = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)]) print a[[2]].sum() from pandas import Series sa = Series(['a', 'b', 'c'], index = [0, 1, 2]) sb = Series(['a', 'b', 'c']) sc = Series(['a', 'c', 'b']) print sa.equals(sc) print sb.equals(sa) print sa*3 + sc*2 from pandas import Series, DataFrame data = {'language': ['Java', 'PHP', 'Python', 'R', 'C#'], 'year': [ 1995 , 1995 , 1991 ,1993, 2000]} frame = DataFrame(data) frame['IDE'] = Series(['Intellij', 'Notepad', 'IPython', 'R studio', 'VS']) print 'VS' in frame['IDE'] print frame.ix[2] # language Python # year ______ # IDE IPython # Name: 2, dtype: object
def test_init_with_array(self): expected = Series({'α1': 0.4, 'α2': 0.3, 'α3': 0.2, 'α4': 0.1}) actual = self.d_array.alpha self.assertTrue(expected.equals(actual))
def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) s2 = s1.copy() self.assert_(s1.equals(s2)) s1[1] = 99 self.assert_(not s1.equals(s2)) # NaNs compare as equal s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) s2 = s1.copy() self.assert_(s1.equals(s2)) s2[0] = 9.9 self.assert_(not s1.equals(s2)) idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) s1 = Series([1, 2, np.nan], index=idx) s2 = s1.copy() self.assert_(s1.equals(s2)) # Add object dtype column with nans index = np.random.random(10) df1 = DataFrame(np.random.random(10, ), index=index, columns=['floats']) df1['text'] = 'the sky is so blue. we could use more chocolate.'.split( ) df1['start'] = date_range('2000-1-1', periods=10, freq='T') df1['end'] = date_range('2000-1-1', periods=10, freq='D') df1['diff'] = df1['end'] - df1['start'] df1['bool'] = (np.arange(10) % 3 == 0) df1.ix[::2] = nan df2 = df1.copy() self.assert_(df1['text'].equals(df2['text'])) self.assert_(df1['start'].equals(df2['start'])) self.assert_(df1['end'].equals(df2['end'])) self.assert_(df1['diff'].equals(df2['diff'])) self.assert_(df1['bool'].equals(df2['bool'])) self.assert_(df1.equals(df2)) self.assert_(not df1.equals(object)) # different dtype different = df1.copy() different['floats'] = different['floats'].astype('float32') self.assert_(not df1.equals(different)) # different index different_index = -index different = df2.set_index(different_index) self.assert_(not df1.equals(different)) # different columns different = df2.copy() different.columns = df2.columns[::-1] self.assert_(not df1.equals(different)) # DatetimeIndex index = pd.date_range('2000-1-1', periods=10, freq='T') df1 = df1.set_index(index) df2 = df1.copy() self.assert_(df1.equals(df2)) # MultiIndex df3 = df1.set_index(['text'], append=True) df2 = df1.set_index(['text'], append=True) self.assert_(df3.equals(df2)) df2 = df1.set_index(['floats'], append=True) self.assert_(not df3.equals(df2)) # NaN in index df3 = df1.set_index(['floats'], append=True) df2 = df1.set_index(['floats'], append=True) self.assert_(df3.equals(df2))
def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) s2 = s1.copy() self.assertTrue(s1.equals(s2)) s1[1] = 99 self.assertFalse(s1.equals(s2)) # NaNs compare as equal s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) s2 = s1.copy() self.assertTrue(s1.equals(s2)) s2[0] = 9.9 self.assertFalse(s1.equals(s2)) idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) s1 = Series([1, 2, np.nan], index=idx) s2 = s1.copy() self.assertTrue(s1.equals(s2)) # Add object dtype column with nans index = np.random.random(10) df1 = DataFrame(np.random.random(10,), index=index, columns=['floats']) df1['text'] = 'the sky is so blue. we could use more chocolate.'.split() df1['start'] = date_range('2000-1-1', periods=10, freq='T') df1['end'] = date_range('2000-1-1', periods=10, freq='D') df1['diff'] = df1['end'] - df1['start'] df1['bool'] = (np.arange(10) % 3 == 0) df1.ix[::2] = nan df2 = df1.copy() self.assertTrue(df1['text'].equals(df2['text'])) self.assertTrue(df1['start'].equals(df2['start'])) self.assertTrue(df1['end'].equals(df2['end'])) self.assertTrue(df1['diff'].equals(df2['diff'])) self.assertTrue(df1['bool'].equals(df2['bool'])) self.assertTrue(df1.equals(df2)) self.assertFalse(df1.equals(object)) # different dtype different = df1.copy() different['floats'] = different['floats'].astype('float32') self.assertFalse(df1.equals(different)) # different index different_index = -index different = df2.set_index(different_index) self.assertFalse(df1.equals(different)) # different columns different = df2.copy() different.columns = df2.columns[::-1] self.assertFalse(df1.equals(different)) # DatetimeIndex index = pd.date_range('2000-1-1', periods=10, freq='T') df1 = df1.set_index(index) df2 = df1.copy() self.assertTrue(df1.equals(df2)) # MultiIndex df3 = df1.set_index(['text'], append=True) df2 = df1.set_index(['text'], append=True) self.assertTrue(df3.equals(df2)) df2 = df1.set_index(['floats'], append=True) self.assertFalse(df3.equals(df2)) # NaN in index df3 = df1.set_index(['floats'], append=True) df2 = df1.set_index(['floats'], append=True) self.assertTrue(df3.equals(df2))
def test_equals_list_array(): # GH20676 Verify equals operator for list of Numpy arrays arr = np.array([1, 2]) s1 = Series([arr, arr]) s2 = s1.copy() assert s1.equals(s2)