cases = np.diff(cases.sum(axis=0, keepdims=True), axis=-1) # nearest country to this one based on trajectory label = knn.predict(cases) if val not in predictions: predictions[val] = {} predictions[val][_dist] = label.tolist() with open('results/knn_diff.json', 'w') as f: json.dump(predictions, f, indent=4) ''' # ---- Find its 30 geographic neighbors ---- # ---- Then perform knn and predict ---- for val in np.unique(confirmed["Country/Region"]): df = data.filter_by_attribute(confirmed, "Country/Region", val) if df.shape[0] >= 2: whole_data = df['5/19/20'].idxmax(axis=0) df = df.loc[[whole_data]] cases, labels = data.get_cases_chronologically(df) features.append(cases) targets.append(labels) features = np.concatenate(features, axis=0) targets = np.concatenate(targets, axis=0) predictions = {} targets_geo_location = targets[:, 2:] mykd = scipy.spatial.KDTree(targets_geo_location) for _dist in ['minkowski', 'manhattan']:
NUM_COLORS = 58 LINE_STYLES = ['solid', 'dashed', 'dotted'] NUM_STYLES = len(LINE_STYLES) confirmed = os.path.join( BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_confirmed_US.csv') confirmed = data.load_csv_data(confirmed) features = np.array([]) targets = np.array([]) # extract each state's case prograssion for val in np.unique(confirmed["Province_State"]): df = data.filter_by_attribute( confirmed, "Province_State", val) cases, labels = data.get_cases_chronologically_US(df) cases = cases.sum(axis=0) if features.size == 0: features = cases elif val != 'Diamond Princess' and val != 'Grand Princess': features = np.vstack((features, cases)) if val != 'Diamond Princess' and val != 'Grand Princess': targets = np.append(targets, labels[0]) # Smooth STATE progession curves and compute second derivative # Idea to use a Savitsky-Golay filter comes from this stackoverflow page: # https://stackoverflow.com/questions/20618804/how-to-smooth-a-curve-in-the-right-way sg_state_cases = np.zeros(features.shape) for i in range(features.shape[0]): sg_state_cases[i,:] = savgol_filter(features[i,:], 51, 3) # window size 51, polynomial order 3
death = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_deaths_global.csv') death = data.load_csv_data(death) features = [] targets = [] fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(111) cm = plt.get_cmap('jet') NUM_COLORS = 0 LINE_STYLES = ['solid', 'dashed', 'dotted'] NUM_STYLES = len(LINE_STYLES) for val in np.unique(death["Country/Region"]): df = data.filter_by_attribute(death, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) cases = cases.sum(axis=0) if cases.sum() > MIN_CASES: NUM_COLORS += 1 colors = [cm(i) for i in np.linspace(0, 1, NUM_COLORS)] legend = [] handles = [] for val in np.unique(death["Country/Region"]): df = data.filter_by_attribute(death, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) cases = cases.sum(axis=0)
confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_confirmed_US.csv') deaths = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_deaths_US.csv') confirmed = data.load_csv_data(confirmed) deaths = data.load_csv_data(deaths) state_dict = {} features_deaths = [] targets_deaths = [] i = 0 for val in np.unique(deaths["Province_State"]): state_dict.update({i: val}) df = data.filter_by_attribute(deaths, "Province_State", val) cases, labels = data.get_cases_chronologically(df) label = i new_labels = np.ones(labels.shape[0]) * i features_deaths.append(cases) targets_deaths.append(new_labels) i += 1 features_deaths = np.concatenate(features_deaths, axis=0) targets_deaths = np.concatenate(targets_deaths, axis=0) unique = np.unique(targets_deaths, return_counts=True) small_values = np.where(unique[1] <= 5)
recovered = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_recovered_global.csv') recovered = data.load_csv_data(recovered) features = [] targets = [] fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(111) cm = plt.get_cmap('jet') NUM_COLORS = 0 LINE_STYLES = ['solid', 'dashed', 'dotted'] NUM_STYLES = len(LINE_STYLES) for val in np.unique(recovered["Country/Region"]): df = data.filter_by_attribute(recovered, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) cases = cases.sum(axis=0) if cases.sum() > MIN_CASES: NUM_COLORS += 1 colors = [cm(i) for i in np.linspace(0, 1, NUM_COLORS)] legend = [] handles = [] for val in np.unique(recovered["Country/Region"]): df = data.filter_by_attribute(recovered, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) cases = cases.sum(axis=0)