Example #1
0
    def test_find_lat_long(self):
        city_lat_long = load_city_lat_long()

        df = preprocess(load_data)

        for city_key in list(df.CityCountry.unique()):
            if city_key in city_lat_long:
                print(city_lat_long[city_key])
Example #2
0
    def test_load_data(self):
        df = preprocess(load_data)

        # check temperature values
        self.assertEqual(df.loc[df.AvgTemperature < -100, "AvgTemperature"].count(), 0)
        self.assertEqual(df.loc[df.AvgTemperature > 200, "AvgTemperature"].count(), 0)

        non_null_cols = df.drop(columns=["State", "AvgTemperature"])
        nan_rows = non_null_cols[non_null_cols.isnull().T.any()]
        # check for nan, nulls, empty str
        self.assertEqual(
            len(nan_rows),
            0,
            nan_rows,
        )
        city_country = df["CityCountry"]
        self.assertFalse(city_country.isnull().any().any())

        self.assertEqual(
            len(np.where(city_country == "")[0]),
            0,
            np.where(city_country == "")[0],
        )
def positive_negative_samples_test():
    Z = []
    #sample number in DD.mat dataset
    x = 2
    i = 0
    # i defines the number of samples variations we want to create for a matrix
    while i < 20:
        #append the xth Adj matrix from dataset
        Z.append(preprocess(x))
        i += 1
    # starting to make variations to all matrix except first one
    j = 1
    while j < 20:
        #generate negative/positive samples and modify the Z[j] matrix
        #generate_negative(Z[j])
        generate_positive(Z[j])
        j += 1

    i = 0
    #comparing with all negative matrices
    j = 1
    while j < 20:

        Z1_x = nx.from_numpy_matrix(Z[i])
        Z2_x = nx.from_numpy_matrix(Z[j])

        # compute the laplacian and eigen values
        lap1 = eig(Z1_x, "eigvalsh")
        lap2 = eig(Z2_x, "eigvalsh")
        #import ipdb; ipdb.set_trace()

        print('Variation {} - {}'.format(
            j, 1 - spatial.distance.cosine(lap1, lap2)))
        print('Variation {} - {}'.format(j, distance.euclidean(lap1, lap2)))
        print('Variation {} - {}'.format(j, distance.minkowski(lap1, lap2)))
        j += 1
from process_data import preprocess, load_data
from render_charts import vis_data


def print_info(df):
    print(df.dtypes)
    print(df.describe(datetime_is_numeric=True))
    # print(df.Date.dt.day.explode().value_counts())
    print(df.drop(columns=["State"]).isnull().sum())

    print(df.drop(columns=["State"])[df["AvgTemperature"].isna()])

    cites_num = len(df["City"].unique())

    print(
        f"This data contains a list of daily average temperatures from {cites_num} cities and {len(df['Country'].unique())} countries."
    )


if __name__ == "__main__":
    df = preprocess(load_data())
    print_info(df)
Example #5
0
 def test_snapshot_match(self):
     df = preprocess(load_data)
     self.assertMatchSnapshot(df.info(), "df_info")
     self.assertMatchSnapshot(df.describe(datetime_is_numeric=True), "df_describe")
Example #6
0
    def test_calc_monthly(self):
        df = preprocess(load_data)

        # check columns
        calc_monthly(build_city_df(df, "Abilene, Texas, US", False))
Example #7
0
    def test_build_city_df(self):
        df = preprocess(load_data)

        # check columns
        self.assertRaises(LookupError, build_city_df, df, "test", False)
        build_city_df(df, "Abilene, Texas, US", False)