def run(doc):
    doc.add_heading("Exercise 6.3", level=2)
    doc.add_paragraph("")
    # Get list of countries:
    df_all = getdata.acquire_data('all')
    country_list = list(set(df_all['location'].to_list()))
    column_list = [
        'skewness²', 'kurtosis', r'$\alpha$', r'$\beta$', r'$\beta_t$', 'name'
    ]
    df = pd.DataFrame(columns=column_list)
    for location in country_list:
        df_owd = df_all[df_all['location'] == location]
        data = df_owd['new_cases'].to_list()
        if len(list(set(data))) > 11:
            ds = stat.series2datasetline(data)
            alpha, beta_t, beta = specplus.main(data)
            plt.close('all')
            if alpha > 0:
                df_temp = pd.DataFrame([[
                    ds['skewness']**2, ds['kurtosis'], alpha, beta, beta_t,
                    location
                ]],
                                       columns=column_list)
                df = df.append(df_temp, ignore_index=True, sort=False)

    # Apply K-Means grouping:
    parameter_spaces = (['skewness²', 'kurtosis',
                         r'$\beta$'], ['skewness²', 'kurtosis', r'$\alpha$'])

    for parameters in parameter_spaces:
        kmeans_obj = graphs.plot_k_means(df, parameters, doc,
                                         "COVID-19 New Cases per Country")
        df["beta group" if
           ("beta" in x
            for x in parameters) else "alpha group"] = kmeans_obj.labels_

    doc.add_paragraph(
        'Below is the data set created for most countries available in the data collected from'
        +
        ' "Out world in Data" website. Only countries with a minimum data diversity were selected, by'
        +
        'checking if the number of new cases had assumed at least 12 distinct values. This eliminates '
        +
        'time series that are simply too short or that do not contain enough data richness to apply the'
        +
        ' available tools. Also, countries with negative alpha were removed. The maximum silhouette '
        +
        'coefficient method was employed to select the number of clusters, which ended up being 2. The '
        +
        'main group has hte majority of countries, whereas high kurtose, high skeness and high alpha '
        +
        'or beta, which apparently correlates with countries in early stages of the epidemic, where '
        +
        'daily new cases are growing steadily rather than slowing down or retreating.'
    )

    # Write to document.
    doc.add_paragraph(df.to_string())
Example #2
0
def run(countrylist, doc=PlaceHolderDoc()):
    doc.add_heading("Regression Analysis for Tests and Cases", level=3)
    doc.add_paragraph("Not all countries have provided their daily and total test statistics, empty plots may be " +
                      "shown for this case.")
    countrylist.sort()
    for country in countrylist:
        doc.add_heading(country, level=4)
        df = getdata.acquire_data(country, acquire_tests=True)
        plt.figure()
        plt.xlabel("New Daily Cases")
        plt.grid("both")
        plt.ylabel("New Daily Tests")
        plt.plot(df['new_cases'], df['new_tests'], 'o', label="Data")
        try:
            m1 = [int(max(df['new_tests'])), int(max(df['new_cases']))]
            plt.plot(range(min(m1)), range(min(m1)), label="y=x", linestyle='--')
            x_regression = df['new_cases'].to_list()
            slope, intercept, rvalue, pvalue, stderr = linregress(x_regression, df['new_tests'].to_list())
            x_regression = [0] + x_regression
            y_regression = [intercept + slope * x for x in x_regression]
            plt.plot(x_regression, y_regression, label="regression")
            plt.title(country + "\n" + 'Regression: y = {:.4f} x + {:.4f}'.format(slope, intercept) + "\n" +
                      'Correlation Coefficient: {:.4f}  Standard Error: {:.4f}'.format(rvalue, stderr))
            plt.legend()
            plt.tight_layout()
            plt.draw()
            doc.add_fig()
        except:
            pass



        plt.figure()
        plt.plot(df['total_cases'], df['total_tests'], 'o', label="Data")
        try:
            m1 = [int(max(df['total_tests'])), int(max(df['total_cases']))]
            plt.plot(range(min(m1)), range(min(m1)), label="y=x", linestyle='--')
            popt, pcov = curve_fit(func, df['total_cases'].to_list(), df['total_tests'].to_list(), p0=(1, 1e-6, 1))
            diag_cov = np.diag(pcov)
            y_exp = func(np.asarray(df['total_cases'].to_list()), popt[0], popt[1], popt[2])
            plt.plot(df['total_cases'].to_list(), y_exp, label='Regression')
            plt.title(
                country + "\n" + 'Regression: y = {:.4f} exp (-{:.4g} x) + {:.4f}'.format(popt[0], -popt[1], popt[2]) +
                "\n" + 'Fit covariances: {:.4f} | {:.4g} | {:.4f}'.format(diag_cov[0], diag_cov[1], diag_cov[2]))
            plt.legend()
            plt.tight_layout()
            plt.xlabel("Total Cases")
            plt.grid("both")
            plt.ylabel("Total Tests")
            plt.draw()
            doc.add_fig()

        except:
            pass
Example #3
0
def run(doc):
    doc.add_heading("Exercise 7.1", level=2)
    doc.add_heading("Exercise 7.1", level=3)
    doc.add_paragraph(
        "The MDFDA files have been fully refactored for the purpose of this work, with many additional"
        + " parameters being computed and shown in charts, including" +
        r'$\Psi$')

    doc.add_heading("Exercise 7.2", level=3)
    doc.add_paragraph(
        "For each signal generator, only one singularity spectrum will be plotted, but the aggregated "
        + " statistics will be shown for the next item.")

    # Control the random seed so results are consistent between runs:
    np.random.seed(182745949)

    # Prepare iterations:
    names = ('GNRG', 'Color', 'P_model_025_exogen_beta04',
             'logistic_rho3.88_tau1.1', 'henon_a1.38_b0.22')
    functions = (
        lambda: grng.time_series(2**np.random.randint(6, 13), 1),
        lambda: colorednoise.powerlaw_psd_gaussian(np.random.uniform(
            0, 2), 8192), lambda: pmodel.pmodel(
                n_values=8192, p=np.random.uniform(0.18, 0.42), slope=0.4)[1],
        lambda: logis.logistic_series(np.random.uniform(3.85, 3.95), 0.5, 8192)
        [1], lambda: henon.henon_series(np.random.uniform(
            1.35, 1.4), np.random.uniform(0.21, 0.31), 8192)[1])

    full_names = ('Non Gaussian Random Generator', 'Colored Noise Generator',
                  'P-Model', 'Logistic Map', 'Henon Map')
    sizes = (80, 60, 60, 60, 60)
    columns = ['skewness²', r'$\Psi$', 'generator']
    df_all = pd.DataFrame(columns=columns)
    xd = list()
    yd = list()
    for (name, func, full_name, size) in zip(names, functions, full_names,
                                             sizes):
        doc.add_heading(full_name, level=3)
        df_tmp = pd.DataFrame(columns=columns)
        for ii in range(0, size):
            # Generate a time series:
            data = func()
            plt.close('all')
            ret = mfdfa.main(data)
            df_tmp = df_tmp.append(
                pd.DataFrame([[skew(data)**2, ret['Psi'], full_name]],
                             columns=columns))
        doc.add_fig()
        # Save data form the last chart:
        line = plt.gca().get_lines()[0]
        xd.append(line.get_xdata())
        yd.append(line.get_ydata())
        # Add K-means chart:
        graphs.plot_k_means(df_tmp, ['skewness²', r'$\Psi$'], doc, full_name)

        df_all = df_all.append(df_tmp, ignore_index=True, sort=False)
    doc.add_heading("For all generators:", level=3)
    graphs.plot_k_means(df_all, ['skewness²', r'$\Psi$'], doc, "All series")

    plt.figure()
    for x, y, full_name in zip(xd, yd, full_names):
        plt.plot(x, y, 'o-', label=full_name)
    plt.title("Comparing Singularity Spectra")
    plt.xlabel(r'$\alpha$')
    plt.ylabel(r'$f(\alpha)$')
    plt.legend(loc='upper center',
               bbox_to_anchor=(0.5, -0.1),
               fancybox=True,
               shadow=True,
               ncol=3)
    plt.grid('on', which='both')
    plt.tight_layout()
    doc.add_heading("Comparing singularity Spectra", level=4)
    doc.add_paragraph(
        "Here we show a comparative example of the singularity spectrum for time series generated from "
        +
        "different methods, in particular, we see the much wider spectrum of the series generated with "
        +
        "P-Model, which is also quite symmetrical, while for the other models, the spectrum is "
        +
        "left-truncated, which indicates that the spectrum is insensitive to larger local fluctuations."
    )
    doc.add_paragraph(
        "The spectrum of the P-Model being the widest, also indicates a higher degree of "
        + "multifractality and data complexity.")
    doc.add_fig()
    # Next section:
    doc.add_heading("Exercise 7.3", level=3)
    doc.add_paragraph("")
    # Load country data:
    df_all = getdata.acquire_data('all')
    country_list = list(set(df_all['location'].to_list()))
    columns = ['skewness²', r'$\Psi$', 'Country']
    df = pd.DataFrame(columns=columns)
    for location in country_list:
        df_owd = df_all[df_all['location'] == location]
        data = df_owd['new_cases'].to_list()
        if len(list(set(data))) > 11:
            ret = mfdfa.main(data)
            this_skew = skew(data)
            plt.close('all')
            if not np.isnan(ret['Psi']) and not np.isnan(this_skew):
                df = df.append(
                    pd.DataFrame([[this_skew**2, ret['Psi'], location]],
                                 columns=columns))
    kmeans_obj = graphs.plot_k_means(df, ['skewness²', r'$\Psi$'], doc,
                                     "New Cases of COVID-19 By Country")
    df['group'] = kmeans_obj.labels_
    doc.add_paragraph(
        "Now, we print the results for each country along with the grouping proposed:"
    )
    doc.add_paragraph(df.to_string())
Example #4
0
def run(doc):
    doc.add_heading("Exercise 9", level=2)
    doc.add_heading("Exercise 9.1", level=3)
    doc.add_paragraph("Due to the low diversity of numbers contained in a p-model generated time series, the " +
                      "aggregation used by the soc.py algorithm often produces few points or empty bins, resulting in " +
                      "poor charts for this generator.")

    # Control the random seed so results are consistent between runs:
    np.random.seed(1827459459)

    for name, p in zip(('Endogenous', 'Exogenous'), ([0.32, 0.42], [0.18, 0.28])):
        doc.add_heading("For endogenous series:", level=2)
        plt.figure()
        for ii in range(0, 50):
            data = pmodel.pmodel(n_values=8192, p=np.random.uniform(p[0], p[1]), slope=np.random.randint(1,3))[1]
            addplot(data, str(ii))
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=3)
        plt.grid('both')
        plt.xlabel('log(ni)')
        plt.ylabel('log(Yi)')
        plt.title("SOC for " + name + " Series")
        plt.tight_layout()
        plt.draw()
        doc.add_fig()

    # Now for 9.2:
    doc.add_heading("Exercise 9.2", level=3)
    # Get list of countries:
    df_all = getdata.acquire_data('all')
    country_list = list(set(df_all['location'].to_list()))
    column_list = ['skewness²', 'kurtosis', r'$\alpha$', r'$\beta$', r'$\beta_t$', 'name']
    excluded = list()
    plt.figure()
    for location in country_list:
        df_owd = df_all[df_all['location'] == location]
        data = df_owd['new_cases'].to_list()
        try:
            addplot(data, location)
        except Exception as e:
            excluded.append(location)

    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=3, fontsize=6)
    plt.grid('both')
    plt.xlabel('log(ni)')
    plt.ylabel('log(Yi)')
    plt.title("SOC for COVID-19 daily new cases data Series")
    plt.tight_layout()
    plt.draw()
    doc.add_fig()
    doc.add_paragraph("A few countries were excluded from this analysis, because they crashed the soc.py script, " +
                      "these are:" + ", ".join(excluded))

    doc.add_paragraph("Now, we repeat the process for a few select countries")
    plt.figure()
    for location in country_list:
        df_owd = df_all[df_all['location'] == location]
        data = df_owd['new_cases'].to_list()
        try:
            addplot(data, location, ymin=-20)
        except Exception as e:
            pass
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1, fontsize=6)
    plt.grid('both')
    plt.xlabel('log(ni)')
    plt.ylabel('log(Yi)')
    plt.title("SOC for COVID-19 daily new cases data Series in select countries")
    plt.tight_layout()
    plt.draw()
    doc.add_fig()
def run(doc_report):
    doc_report.add_heading('Exercise 8', level=2)
    doc_report.add_heading('Exercise 8.1', level=3)
    doc_report.add_paragraph("""
      Here we compare the Continuous Wavelet Spectrum for time series generated with each signal generator used so far,
      along with provided data series. Both Morley and DOG wavelet charts are used."""
                             )

    mount_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                              '..', 'mount')
    with open(os.path.join(mount_path, 'surftemp504.txt'), 'r') as text_file:
        list_surftemp504 = [float(w) for w in text_file.read().split('\n')]
        print(len(list_surftemp504))

    with open(os.path.join(mount_path, 'sol3ghz.dat'), 'r') as text_file:
        list_sol3ghz = [float(w) for w in text_file.read().split('\n')]
        print(len(list_sol3ghz))

    # Henon map not used as it causes issues with the waipy module.
    names = ('surftemp504', 'sol3ghz', 'USA_COVID19', 'GNRG', 'Color',
             'P_model_038_endogen_beta04', 'P_model_025_exogen_beta04',
             'logistic_rho3.88_tau1.1', 'henon_a1.38_b0.22')
    comments = (
        '',
        'Particularly for sol3ghz data set, we see two wavelet spectrum peaks, which indicates some '
        +
        'recurring feature of the signal that occurs in the time associated with around 8000 samples.',
        'The DOG wavelet seems to reveal some interesting pattern on the USA COVID-19 data around the 128 day'
        +
        'period, while the Morlet transform peaks the spectrum close to 64 days.',
        '', '', '', '', '', '')
    generators = (lambda: list_surftemp504, lambda: list_sol3ghz,
                  lambda: getdata.acquire_data(date_ini='2020-02-20').new_cases
                  .to_list(), lambda: grng.time_series(8192, 1),
                  lambda: colorednoise.powerlaw_psd_gaussian(1, 8192),
                  lambda: pmodel.pmodel(n_values=8192, p=0.38, slope=0.4)[1],
                  lambda: pmodel.pmodel(n_values=8192, p=0.25, slope=0.4)[1],
                  lambda: logis.logistic_series(3.88, 1.1, 8192)[1],
                  lambda: henon.henon_series(np.random.uniform(
                      1.35, 1.4), np.random.uniform(0.21, 0.31), 8192)[1])

    full_names = ('surftemp504 Dataset', 'sol3ghz Dataset',
                  'Daily new cases of COVID-19 in the USA',
                  'Non Gaussian Random Generator', 'Colored Noise Generator',
                  'P-Model Endogenous', 'P-Model Exogenous', 'Logistic Map',
                  'Henon Map')
    for (name, func, full_name, comment) in zip(names, generators, full_names,
                                                comments):
        data = func()
        doc_report.add_heading(full_name, level=3)

        fig_name = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                '..', 'mount', name + '_waipy')
        fig_name = os.path.relpath(fig_name, os.getcwd())
        try:
            result = waipy.cwt(data,
                               1,
                               1,
                               0.25,
                               4,
                               4 / 0.25,
                               0.72,
                               6,
                               mother='Morlet',
                               name='test name')
            waipy.wavelet_plot(fig_name, np.linspace(0, len(data), len(data)),
                               data, 0.03125, result)
            doc_report.add_heading("Morley:", level=4)
            doc_report.document.add_picture(fig_name + '.png', width=Inches(6))
        except Exception as e:
            doc_report.add_heading("Morley could not be computed for " +
                                   full_name,
                                   level=4)
            doc_report.add_paragraph("The received error message was: \n" +
                                     str(e))
        try:
            result = waipy.cwt(data,
                               1,
                               1,
                               0.25,
                               4,
                               4 / 0.25,
                               0.72,
                               6,
                               mother='DOG',
                               name='test name')
            waipy.wavelet_plot(fig_name, np.linspace(0, len(data), len(data)),
                               data, 0.03125, result)
            doc_report.add_heading("DOG:", level=4)
            doc_report.document.add_picture(fig_name + '.png', width=Inches(6))
        except Exception as e:
            doc_report.add_heading("DOG could not be computed for " +
                                   full_name,
                                   level=4)
            doc_report.add_paragraph("The received error message was: \n" +
                                     str(e))
        plt.close('all')
        doc_report.add_paragraph(comment)
Example #6
0
def run(doc, k_means_list):
    doc.add_heading('Exercise 6.2', level=2)
    doc.add_paragraph(
        "Using provided data sets and new cases of COVID-19 in the USA.")
    # open file:
    mount_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                              '..', 'mount')
    with open(os.path.join(mount_path, 'surftemp504.txt'), 'r') as text_file:
        list1 = [float(x) for x in text_file.read().split('\n')]

    with open(os.path.join(mount_path, 'sol3ghz.dat'), 'r') as text_file:
        list2 = [float(x) for x in text_file.read().split('\n')]

    # Data for USA:
    df_owd = getdata.acquire_data(date_ini='2020-02-20')
    list3 = df_owd['new_cases'].to_list()

    df_table = pd.DataFrame()
    df_table['Data Set'] = [
        'surftemp504.txt', 'sol3ghz.dat', 'USA new COVID-19 cases'
    ]
    # Compute parameters:
    ds = [stat.series2datasetline(data) for data in (list1, list2, list3)]
    df_table['skewness²'] = [x['skewness']**2 for x in ds]
    df_table['kurtosis'] = [x['kurtosis'] for x in ds]
    alpha, beta_t, beta = [
        specplus.main(data) for data in (list1, list2, list3)
    ]
    plt.close('all')
    df_table[r'$\alpha$'] = alpha
    df_table[r'$\beta$'] = beta
    df_table[r'$\beta_t$'] = beta_t

    if k_means_list is None:
        pass
    else:
        parameter_spaces = (['skewness²', 'kurtosis', r'$\beta$'],
                            ['skewness²', 'kurtosis', r'$\alpha$'])
        for grouping, parameters in zip(k_means_list, parameter_spaces):
            df_table[" x ".join(parameters)] = grouping.predict(
                df_table[parameters])

    print_table.render_mpl_table(df_table,
                                 col_width=3.0,
                                 bbox=None,
                                 font_size=12)
    doc.add_fig()
    doc.add_paragraph(
        "All datasets would get classified on the same group, if P-Model was included on the "
        +
        "training data, since the grouping would basically be dominated by the high kurtosis "
        +
        "and skewness of p-model series.\nIt can be seen that the behavior of the US series was "
        +
        "clustered along with the colored and non-gaussian noises, which makes sense assuming that "
        +
        "the number of new daily cases should be directly proportional to the number active cases, "
        +
        "more specifically the number of people in the contagious stage of the disease, which in "
        +
        "turn is dependent on the number of new cases of a few preceding days. While this makes sense "
        +
        "for more closed regions, the US seems to have a dynamics of the disease spreading at "
        +
        "exponential rate initially once it reaches a new state or city, thus the closer similarity to "
        + "the Non-Gaussian noise behavior instead of the colored noise.\n")
    doc.add_paragraph(
        "Meanwhile, the data series in surftemp504.txt and sol3ghz.dat, are clustered with the random "
        +
        "noise in the space with beta, but particularly the sol3ghz.dat gets isolated in its own group "
        +
        "when clustering with alpha. Given the correlation between alpha and beta, this might just be "
        +
        "an effect of the lack of normalization or scaling prior to applying the K-Means technique. It "
        + "might also be an effect of the data being")