def test_find_lat_long(self): city_lat_long = load_city_lat_long() df = preprocess(load_data) for city_key in list(df.CityCountry.unique()): if city_key in city_lat_long: print(city_lat_long[city_key])
def test_load_data(self): df = preprocess(load_data) # check temperature values self.assertEqual(df.loc[df.AvgTemperature < -100, "AvgTemperature"].count(), 0) self.assertEqual(df.loc[df.AvgTemperature > 200, "AvgTemperature"].count(), 0) non_null_cols = df.drop(columns=["State", "AvgTemperature"]) nan_rows = non_null_cols[non_null_cols.isnull().T.any()] # check for nan, nulls, empty str self.assertEqual( len(nan_rows), 0, nan_rows, ) city_country = df["CityCountry"] self.assertFalse(city_country.isnull().any().any()) self.assertEqual( len(np.where(city_country == "")[0]), 0, np.where(city_country == "")[0], )
def positive_negative_samples_test(): Z = [] #sample number in DD.mat dataset x = 2 i = 0 # i defines the number of samples variations we want to create for a matrix while i < 20: #append the xth Adj matrix from dataset Z.append(preprocess(x)) i += 1 # starting to make variations to all matrix except first one j = 1 while j < 20: #generate negative/positive samples and modify the Z[j] matrix #generate_negative(Z[j]) generate_positive(Z[j]) j += 1 i = 0 #comparing with all negative matrices j = 1 while j < 20: Z1_x = nx.from_numpy_matrix(Z[i]) Z2_x = nx.from_numpy_matrix(Z[j]) # compute the laplacian and eigen values lap1 = eig(Z1_x, "eigvalsh") lap2 = eig(Z2_x, "eigvalsh") #import ipdb; ipdb.set_trace() print('Variation {} - {}'.format( j, 1 - spatial.distance.cosine(lap1, lap2))) print('Variation {} - {}'.format(j, distance.euclidean(lap1, lap2))) print('Variation {} - {}'.format(j, distance.minkowski(lap1, lap2))) j += 1
from process_data import preprocess, load_data from render_charts import vis_data def print_info(df): print(df.dtypes) print(df.describe(datetime_is_numeric=True)) # print(df.Date.dt.day.explode().value_counts()) print(df.drop(columns=["State"]).isnull().sum()) print(df.drop(columns=["State"])[df["AvgTemperature"].isna()]) cites_num = len(df["City"].unique()) print( f"This data contains a list of daily average temperatures from {cites_num} cities and {len(df['Country'].unique())} countries." ) if __name__ == "__main__": df = preprocess(load_data()) print_info(df)
def test_snapshot_match(self): df = preprocess(load_data) self.assertMatchSnapshot(df.info(), "df_info") self.assertMatchSnapshot(df.describe(datetime_is_numeric=True), "df_describe")
def test_calc_monthly(self): df = preprocess(load_data) # check columns calc_monthly(build_city_df(df, "Abilene, Texas, US", False))
def test_build_city_df(self): df = preprocess(load_data) # check columns self.assertRaises(LookupError, build_city_df, df, "test", False) build_city_df(df, "Abilene, Texas, US", False)