def test_json_conversion_strings_with_leading_zeros(): runner_ids = np.array(['000000000' + str(x) for x in range(10)]) df = DDF({'runner_id': runner_ids}) js = df.to_json() newdf = DDF.from_json(js) newdf = newdf.colslice(df) assert newdf.equals(df)
def test_concatenate_df_with_non_overlapping_cols(): d1 = DDF({'col1': np.arange(4), 'col2': np.arange(4)}) d2 = DDF({'col1': np.arange(2)}) d3 = d1.append(d2, axis=0) expected_col1 = np.array([0, 1, 2, 3, 0, 1]) expected_col2 = np.array([0, 1, 2, 3, np.nan, np.nan]) np.testing.assert_array_equal(d3['col1'], expected_col1) np.testing.assert_array_equal(d3['col2'], expected_col2)
def test_merge_unicode_col(): bad_df = DDF(DATA) bad_str = u'hurac\xe1n' bad_df['name'] = np.array([bad_str]*N_EXAMPLES).astype(unicode) good_df = DDF(DATA) good_str = u'namey_mc_name' good_df['name'] = np.array([good_str]*N_EXAMPLES).astype(str) bad_df.merge(good_df, on=['group_id', 'name'], how='left')
def append_csv(data, path): assert path.endswith('.csv') to_log = data if isinstance(data, DDF) else DDF(data) if os.path.isfile(path): current_log = DDF.from_csv(path) current_log = current_log.append(to_log, axis=0) else: current_log = to_log current_log.to_csv(path)
def test_get_buildings_in_certain_direction(): buildings = DDF({ 'phi_min': [2, 2.1, 1.5, 0.05], 'phi_max': [2.1, 2.2, 2.15, 6.18], }) res = daylight.get_buildings_in_direction(2.05, buildings) expected = DDF({ 'phi_min': [2, 1.5], 'phi_max': [2.1, 2.15], }) assert res.equals(expected)
def test_get_buildings_in_certain_direction_around_zero(): buildings = DDF({ 'phi_min': [0.05], 'phi_max': [6.18], }) res = daylight.get_buildings_in_direction(0.01, buildings) expected = DDF({ 'phi_min': [0.05], 'phi_max': [6.18], }) assert res.equals(expected)
def test_equals_invariant_to_column_order(): df = OrderedDict([ ('col1', np.array([1, 2])), ('col2', np.array([2, 1])), ]) df = DDF(df) df2 = OrderedDict([ ('col2', np.array([2, 1])), ('col1', np.array([1, 2])), ]) df2 = DDF(df2) assert df.equals(df2)
def test_json_conversion_datetimes(): dates = np.array([ '2013-03-30T00:00:00.000000000', '2014-02-10T00:00:00.000000000', '2014-05-12T00:00:00.000000000', '2014-03-17T00:00:00.000000000', '2013-04-12T00:00:00.000000000', '2014-05-19T00:00:00.000000000', '2014-04-09T00:00:00.000000000', '2014-05-11T00:00:00.000000000', '2014-02-25T00:00:00.000000000', '2014-04-24T00:00:00.000000000', '2014-05-30T00:00:00.000000000', '2014-02-09T00:00:00.000000000', '2014-05-05T00:00:00.000000000'], dtype='<M8[ns]') df = DDF({'time': dates}) js = df.to_json() newdf = DDF.from_json(js) newdf = newdf.colslice(df) assert newdf.equals(df)
def test_add_height_relative_to_others(): df = DDF({ 'number_of_stories': [5, 2, 1, 5, 3], 'average_surrounding_building_heights': [5, 6, 1, 5, 1], 'number_of_buildings_in_city': [1, 2, 1, 1, 2], }) expected = DDF({ 'number_of_stories': [5, 2, 1, 5, 3], 'average_surrounding_building_heights': [5, 6, 1, 5, 1], 'number_of_buildings_in_city': [1, 2, 1, 1, 2], 'relative_height': [5, -4, 1, 5, 2], }) output = combined.add_height_relative_to_others(df) assert expected.equals(output)
def test_json_conversion_preserves_string_dtypes(): original_df = DDF({ 'runner_id': np.array(['test', 'testing'], dtype='S7'), 'short_strings': np.array(['a', 'b'], dtype='S1'), 'more_strings': np.array(['abc', 'def'], dtype='S') }) js = original_df.to_json() new_df = DDF.from_json(js) new_df = new_df.colslice(original_df) assert new_df.equals(original_df) for col in ['runner_id', 'short_strings', 'more_strings']: assert new_df[col].dtype == original_df[col].dtype
def test_concatenate(): data = {'a': np.repeat('A', 10), 'b': np.repeat(10, 10)} df = DDF(data) new_df = concatenate([df] * 100) assert new_df.shape == (1000, 2) assert new_df['a'].dtype == df['a'].dtype assert new_df['b'].dtype == df['b'].dtype
def get_buildings_polar_coords(lat, lon, buildings): output = [] for b in buildings: polar = get_polar_coords(lat, lon, b) polar['height'] = b['properties']['height'] output.append(polar) df = DDF(output) df = _drop_itself(df) return df
def test_to_json(df): with pytest.raises(AssertionError): df.to_json() df = df.rename(lambda col: str(col)) df = df.sort('0') json = df.to_json() newdf = DDF.from_json(json) newdf = newdf.sort('0') assert newdf.equals(df)
def test_append_csv(tmpdir): path = str(tmpdir) + 'log.csv' data = DDF({'a': 1, 'b': 10}) append_csv(data, path) loaded = DDF.from_csv(path) assert loaded.equals(data) new_data = DDF({'a': 100, 'b': 101}) append_csv(new_data, path) loaded_appended = DDF.from_csv(path) appended = data.append(new_data) assert appended.equals(loaded_appended)
def test_thin_by_id_with_offset(): df = DDF({'race_id': [5, 5, 5, 6, 6, 3, 3, 3, 4, 4, 1, 1]}) result = thin_by_id(df, 'race_id', 2, offset=1) expected = np.array([6, 6, 4, 4]) assert np.all(result['race_id'] == expected)
def test_thin_by_id_retains_df_order(): df = DDF({'race_id': [5, 5, 5, 6, 6, 3, 3, 3, 4, 4, 1, 1]}) result = thin_by_id(df, 'race_id', 2) expected = np.array([5, 5, 5, 3, 3, 3, 1, 1]) assert np.all(result['race_id'] == expected)
def test_clean_rent(): df = DDF({'rent': ['$26.57/fs', '-', '$24.92/+util']}) expected = DDF({'rent': [26.57, np.nan, 24.92]}) output = datasets.clean_rent(df) assert expected.equals(output)
def get_test_set(): DDF.from_hd5(test_data_path)
def test_drop_rows_with_missing_rent(): df = DDF({'rent': [26.57, np.nan, 24.92, 550.35]}) expected = DDF({'rent': [26.57, 24.92]}) output = datasets.drop_rows_with_missing_rent(df) assert expected.equals(output)
def test_can_merge_two_ddfs(): d1 = DDF({'col1': np.arange(4), 'col2': np.arange(4)}) d2 = DDF({'col1': np.arange(2), 'col3': np.arange(2)}) d3 = d2.merge(d1, on='col1', how='outer') assert np.allclose(d3['col2'], d1['col2']) assert utils.nan_allclose(d3['col3'], np.array([0, 1, np.nan, np.nan]))
def test_saving_to_csv_by_default_does_not_save_index(tmpdir): save_path = str(tmpdir) + '/saved_df.csv' df = DDF({'col': np.arange(5)}) df.to_csv(save_path) loaded_df = DDF.from_csv(save_path) assert 'Unnamed: 0' not in loaded_df
def _get_small_test_set(): df = DDF.from_hd5(small_test_set_path) return df
def test_ddf_init(): data = [{'a': 1}] df = DDF(data) assert isinstance(df.data['a'], np.ndarray)
def test_equals_returns_false_when_columns_are_not_the_same(): df = DDF({'col1': np.array([1, 2])}) df2 = DDF({'col2': np.array([1, 2])}) assert not df.equals(df2)
def concatenate(dfs): new_dict = {} for col in dfs[0]: new_dict[col] = np.concatenate([df[col] for df in dfs]) new_df = DDF(new_dict) return new_df
def test_appending_empty_df(df): df.append(DDF())
def test_ddf_repr(): df = DDF() repr(df)
def test_merge_preservers_strings(): d1 = DDF({'a': np.arange(4), 'b': np.repeat('b', 4), 'd': np.repeat('r', 4)}) d2 = DDF({'a': np.arange(4), 'c': np.repeat('c', 4), 'd': np.repeat('2r', 4)}) merged = d1.merge(d2, on='a') assert merged['b'].dtype.type is np.str_ assert merged['c'].dtype.type is np.str_
def get_small_df(): df = DDF({ 'col1': np.arange(10), 'target': np.append(np.zeros(5), np.ones(5)) }) return df
def test_add_column_which_is_shape_n_1(): DDF({'col': np.arange(10), 'new_col': np.arange(10).reshape((-1, 1))})