コード例 #1
0
def test_all_values():
    possible_values = ["val1", "val2", "val3"]
    data = pd.Series(["val1", "val3", "val1", "val2"])
    expected = pd.DataFrame([(1, 0, 0), (0, 0, 1), (1, 0, 0), (0, 1, 0)], columns=possible_values)

    actual = get_dummies(data, possible_values)
    assert_frame_equal(expected, actual)
コード例 #2
0
def test_empty_input():
    possible_values = ["val1", "val2", "val3"]
    data = pd.Series([])
    expected = pd.DataFrame([], columns=possible_values)

    actual = get_dummies(data, possible_values)
    assert_frame_equal(expected, actual)
コード例 #3
0
def test_only_nan():
    possible_values = ["val1", "val2", "val3"]
    data = pd.Series([np.nan, np.nan, np.nan, np.nan])
    expected = pd.DataFrame([(np.nan, np.nan, np.nan), (np.nan, np.nan, np.nan), (np.nan, np.nan, np.nan),
                             (np.nan, np.nan, np.nan)], columns=possible_values)

    actual = get_dummies(data, possible_values)
    assert_frame_equal(expected, actual)
コード例 #4
0
def make_house_type_features(db_connection):
    """
    Get information whether a house is a single-family, two-family, three-family, multi-family home or mixed
    used (residential + commercial)

    Input:
    db_connection: connection to postgres database. "set schema ..." must have been called on this connection
                   to select the correct schema from which to load inspections

    Output:
    A pandas dataframe, with one row per parcels and one column per feature.
    """

    query = ("SELECT inspections.parcel_id, parcels.class "
             "FROM parcels_inspections AS inspections "
             "JOIN shape_files.parcels_cincy AS parcels "
             "ON parcels.parcelid = inspections.parcel_id")

    df = pd.read_sql(query, con=db_connection)
    df = df.set_index("parcel_id")

    # map use code to type of home
    use_codes = {423: "mixed-used",
                 510: "single-family",
                 520: "two-family",
                 530: "three-family",
                 550: "multi-family",
                 554: "multi-family",
                 552: "multi-family",
                 599: "multi-family"}
    df["type"] = df["class"].apply(lambda cl: use_codes.get(cl, np.nan))

    df = util.get_dummies(df["type"], possible_values=["single-family", "two-family",
                                                       "three-family", "multi-family", "mixed-use"])
    df = df.fillna(0)

    return df
コード例 #5
0
def test_illegal_value():
    possible_values = ["val1", "val2", "val3"]
    data = pd.Series(["val1", "val3", "val1", "val4"])

    get_dummies(data, possible_values)