Example #1
0
        cases = np.diff(cases.sum(axis=0, keepdims=True), axis=-1)
        # nearest country to this one based on trajectory
        label = knn.predict(cases)
        
        if val not in predictions:
            predictions[val] = {}
        predictions[val][_dist] = label.tolist()

with open('results/knn_diff.json', 'w') as f:
    json.dump(predictions, f, indent=4)
'''

# ---- Find its 30 geographic neighbors ----
# ---- Then perform knn and predict ----
for val in np.unique(confirmed["Country/Region"]):
    df = data.filter_by_attribute(confirmed, "Country/Region", val)
    if df.shape[0] >= 2:
        whole_data = df['5/19/20'].idxmax(axis=0)
        df = df.loc[[whole_data]]
    cases, labels = data.get_cases_chronologically(df)
    features.append(cases)
    targets.append(labels)

features = np.concatenate(features, axis=0)
targets = np.concatenate(targets, axis=0)
predictions = {}

targets_geo_location = targets[:, 2:]
mykd = scipy.spatial.KDTree(targets_geo_location)

for _dist in ['minkowski', 'manhattan']:
Example #2
0
NUM_COLORS = 58
LINE_STYLES = ['solid', 'dashed', 'dotted']
NUM_STYLES = len(LINE_STYLES)

confirmed = os.path.join(
    BASE_PATH,
    'csse_covid_19_time_series',
    'time_series_covid19_confirmed_US.csv')
confirmed = data.load_csv_data(confirmed)
features = np.array([])
targets = np.array([])

# extract each state's case prograssion
for val in np.unique(confirmed["Province_State"]):
    df = data.filter_by_attribute(
        confirmed, "Province_State", val)
    cases, labels = data.get_cases_chronologically_US(df)
    cases = cases.sum(axis=0)
    if features.size == 0:
        features = cases
    elif val != 'Diamond Princess' and val != 'Grand Princess':
        features = np.vstack((features, cases))
    if val != 'Diamond Princess' and val != 'Grand Princess':
        targets = np.append(targets, labels[0])

# Smooth STATE progession curves and compute second derivative
# Idea to use a Savitsky-Golay filter comes from this stackoverflow page:
# https://stackoverflow.com/questions/20618804/how-to-smooth-a-curve-in-the-right-way
sg_state_cases = np.zeros(features.shape)
for i in range(features.shape[0]):
    sg_state_cases[i,:] = savgol_filter(features[i,:], 51, 3) # window size 51, polynomial order 3
Example #3
0
death = os.path.join(BASE_PATH, 'csse_covid_19_time_series',
                     'time_series_covid19_deaths_global.csv')
death = data.load_csv_data(death)
features = []
targets = []

fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(111)
cm = plt.get_cmap('jet')
NUM_COLORS = 0
LINE_STYLES = ['solid', 'dashed', 'dotted']
NUM_STYLES = len(LINE_STYLES)

for val in np.unique(death["Country/Region"]):
    df = data.filter_by_attribute(death, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    cases = cases.sum(axis=0)

    if cases.sum() > MIN_CASES:
        NUM_COLORS += 1

colors = [cm(i) for i in np.linspace(0, 1, NUM_COLORS)]
legend = []
handles = []

for val in np.unique(death["Country/Region"]):
    df = data.filter_by_attribute(death, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    cases = cases.sum(axis=0)
confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series',
                         'time_series_covid19_confirmed_US.csv')

deaths = os.path.join(BASE_PATH, 'csse_covid_19_time_series',
                      'time_series_covid19_deaths_US.csv')

confirmed = data.load_csv_data(confirmed)
deaths = data.load_csv_data(deaths)
state_dict = {}
features_deaths = []
targets_deaths = []
i = 0

for val in np.unique(deaths["Province_State"]):
    state_dict.update({i: val})
    df = data.filter_by_attribute(deaths, "Province_State", val)

    cases, labels = data.get_cases_chronologically(df)
    label = i

    new_labels = np.ones(labels.shape[0]) * i

    features_deaths.append(cases)
    targets_deaths.append(new_labels)
    i += 1

features_deaths = np.concatenate(features_deaths, axis=0)

targets_deaths = np.concatenate(targets_deaths, axis=0)
unique = np.unique(targets_deaths, return_counts=True)
small_values = np.where(unique[1] <= 5)
recovered = os.path.join(BASE_PATH, 'csse_covid_19_time_series',
                         'time_series_covid19_recovered_global.csv')
recovered = data.load_csv_data(recovered)
features = []
targets = []

fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(111)
cm = plt.get_cmap('jet')
NUM_COLORS = 0
LINE_STYLES = ['solid', 'dashed', 'dotted']
NUM_STYLES = len(LINE_STYLES)

for val in np.unique(recovered["Country/Region"]):
    df = data.filter_by_attribute(recovered, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    cases = cases.sum(axis=0)

    if cases.sum() > MIN_CASES:
        NUM_COLORS += 1

colors = [cm(i) for i in np.linspace(0, 1, NUM_COLORS)]
legend = []
handles = []

for val in np.unique(recovered["Country/Region"]):
    df = data.filter_by_attribute(recovered, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    cases = cases.sum(axis=0)