예제 #1
0
def main(args: argparse.Namespace) -> ResultValue:
    log = logging.getLogger('Main')
    log.info(" >>")
    rv: ResultValue = ResultKo(Exception("Error"))
    try:
        data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 "..", "data", "reduced_report_data.csv")
        result = load_data_file(data_file=data_file)
        delta_cols = ["CASI TOTALI - A", "DECEDUTI"]
        for col in delta_cols:
            if result.is_ok:
                result = calculate_daily_diffs(cast(pd.DataFrame, result()),
                                               in_col=col,
                                               out_col="D - {c}".format(c=col))
            else:
                break
        if result.is_ok():
            result = save_data_file(
                cast(pd.DataFrame, result()),
                os.path.join(os.path.dirname(os.path.realpath(__file__)), "..",
                             "data", "report_data.csv"),
                owerwrite=True)
        if result.is_ok():
            rv = ResultOk(None)

        data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 "..", "data", "report_data.csv")
        result = load_data_file(data_file=data_file)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        rv = ResultKo(ex)
    log.info(" ({rv}) <<".format(rv=rv))
    return rv
예제 #2
0
def save_data_file(df: pd.DataFrame,
                   data_file_out: str,
                   sorting_col: str = "REPORT DATE",
                   owerwrite: bool = False) -> ResultValue:
    log = logging.getLogger('save_data_file')
    log.info(" >>")
    try:
        mode = 'w'
        header = True
        column_list = df.columns.values
        df.sort_values(by=[sorting_col], inplace=True)
        if os.path.isfile(data_file_out) == True:
            if not owerwrite:
                mode = 'a'
                header = False
            else:
                header = True
            with open(data_file_out) as fh:
                csv_reader = csv.reader(fh)
                csv_headings = next(csv_reader)
                if csv_headings != list(column_list):
                    ex = Exception(
                        "Columns differnt from file header\n {l1}\n {l2}\n".
                        format(l1=column_list, l2=csv_headings))
                    log.error("Error in date translation - {e}".format(e=ex))
                    return ResultKo(ex)
        log.info("Save to: {f} headers: {h}".format(f=data_file_out, h=header))
        df.to_csv(data_file_out, mode=mode, header=header, index=False)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(df)
예제 #3
0
def save_df_to_csv(df: pd.DataFrame, csv_file_name: str,
                   column_list: List[str], sorting_col: str) -> ResultValue:
    log = logging.getLogger('save_df_to_csv')
    log.info(" >>")
    try:
        mode = 'w'
        header = True
        df = df.loc[:, column_list]
        df.sort_values(by=[sorting_col], inplace=True)
        if os.path.isfile(csv_file_name) == True:
            header = False
            mode = 'a'
            with open(csv_file_name) as fh:
                csv_reader = csv.reader(fh)
                csv_headings = next(csv_reader)
                if csv_headings != column_list:
                    ex = Exception(
                        "Columns differnt from file header\n {l1}\n {l2}\n".
                        format(l1=column_list, l2=csv_headings))
                    log.error("Error in date translation - {e}".format(e=ex))
                    return ResultKo(ex)
        log.info("Save to: {f} headers: {h}".format(f=csv_file_name, h=header))
        df.to_csv(csv_file_name, mode=mode, header=header, index=False)

    except Exception as ex:
        log.error(" failed - {ex}".format(ex=ex))
        return ResultKo(ex)

    log.info(" <<")
    return ResultOk(True)
예제 #4
0
def age_distribution(df: pd.DataFrame,
                     ax: mp.axes.Axes,
                     gender: str = "F") -> ResultValue:
    log = logging.getLogger('age_distribution')
    log.info(" >>")
    try:
        if gender.upper() not in ["M", "F", "B"]:
            msg = "Geneder {v} value not known".format(v=gender)
            log.error(msg)
            return ResultKo(Exception(msg))

        by_age = df.groupby(["fascia_anagrafica"]).sum()
        by_age.reset_index(level=0, inplace=True)
        by_age["totals"] = by_age["sesso_femminile"] + by_age["sesso_maschile"]

        values = by_age["sesso_femminile" if gender == "F" else
                        ("sesso_maschile" if gender == "M" else "totals")]
        labels = by_age["fascia_anagrafica"]
        ax.pie(values, labels=labels, autopct='%1.1f%%', colors=colors)
        ax.set_title("Distribuzione per eta'", fontsize=18)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(True)
예제 #5
0
def save_content_to_file(file_name: str, content: bytes) -> ResultValue:
    log = logging.getLogger('save_content_to_file')
    rv: ResultValue = ResultKo(Exception("Error"))
    try:
        with open(file_name, "wb") as fh:
            fh.write(content)
    except Exception as ex:
        log.error("save_content_to_file failed - {ex}".format(ex=ex))
        rv = ResultKo(ex)
    else:
        rv = ResultOk(True)
    return rv
예제 #6
0
def load_date_range_reports(begin: dt.datetime, to: dt.datetime,
                            context: dict) -> ResultValue:
    log = logging.getLogger('load_date_range_reports')
    log.info(" >>")
    try:
        for single_date in daterange(begin, to):
            df = append_new_data(single_date.strftime("%d/%m/%Y"), context)
            if df.is_in_error():
                return ResultKo(Exception("Failure in append_new_data."))

    except Exception as ex:
        log.error(" failed - {ex}".format(ex=ex))
        return ResultKo(ex)

    log.info(" <<")
    return ResultOk(df)
예제 #7
0
def company_distribution(df: pd.DataFrame, ax: mp.axes.Axes) -> ResultValue:
    log = logging.getLogger('company_distribution')
    log.info(" >>")
    try:

        def autopct_format(values):
            def my_format(pct):
                total = sum(values)
                val = int(round(pct * total / 100.0))
                str_val = f'{val:n}'
                return '{v:d}'.format(v=val)

            return my_format

        colors = [
            "#9aff33", "#34ff33", "#33ff98", "#33fffe", "#339aff", "#3371ff",
            "#5b33ff", "#c133ff", "#ff33d7"
        ]
        by_company = df.groupby(["fornitore"]).sum()
        by_company.reset_index(level=0, inplace=True)
        values = by_company["numero_dosi"]
        labels = by_company["fornitore"]
        ax.pie(values,
               labels=labels,
               colors=colors,
               autopct=autopct_format(values))
        ax.set_title("Vaccini consegnati", fontsize=18)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(True)
예제 #8
0
def plot_delivered_vaccines_quantity(df_delivered: pd.DataFrame,
                                     ax: mp.axes.Axes) -> ResultValue:
    log = logging.getLogger('plot_delivered_vaccines_quantity')
    log.info(" >>")
    rv: ResultValue = ResultKo(Exception("Error"))
    try:
        line_label = "Dosi consegnate - somma"
        line_color = "#ff5733"

        df_delivered.sort_values(by="data_consegna", inplace=True)
        by_date = df_delivered.groupby(["data_consegna"]).sum()
        by_date.reset_index(level=0, inplace=True)
        by_date["cumulata"] = by_date["numero_dosi"].cumsum()

        x_del = by_date["data_consegna"]
        y_del = by_date["cumulata"]

        remove_tick_lines('x', ax)
        remove_tick_lines('y', ax)
        set_axes_common_properties(ax, no_grid=True)
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))
        ax.xaxis.set_minor_formatter(mdates.DateFormatter("%d/%m"))
        ax.xaxis.set_major_locator(mdates.DayLocator(interval=2))

        ax.scatter(x_del, y_del, s=30, marker='.')
        line = ax.plot(x_del,
                       y_del,
                       'b-',
                       linewidth=2,
                       color=line_color,
                       label=line_label)

        ax.set_xticklabels(x_del, rotation=80)

        handles, labels = ax.get_legend_handles_labels()
        patch = mpatches.Patch(color=line_color, label=line_label)
        handles.append(patch)
        plt.legend(handles=handles, loc='upper left')

        rv = ResultOk(line)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        rv = ResultKo(ex)
    log.info(" <<")
    return rv
예제 #9
0
def get_web_file(url: str) -> ResultValue:
    log = logging.getLogger('get_web_file')
    log.info(" >>")
    log.info("Url: {u}".format(u=url))
    rv: ResultValue = ResultKo(Exception("Error"))
    result_content: bytes = bytearray()
    try:
        result = requests.get(url)
        if result.status_code not in ok_statuses:
            log.info("Get data failed. Received error code: {er}".format(
                er=str(result.status_code)))
        else:
            result_content = result.content
            rv = ResultOk(result_content)
    except Exception as ex:
        log.error(" failed - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info("get_web_file ({rv}) <<".format(rv=rv))
    return rv
예제 #10
0
def create_delivered_dataframe(data_file: str) -> ResultValue:
    log = logging.getLogger('create_delivered_dataframe')
    log.info(" >>")
    try:
        df = pd.read_csv(data_file, sep=',', parse_dates=["data_consegna"])

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(df)
예제 #11
0
def main(args: argparse.Namespace) -> bool:
    log = logging.getLogger('Main')
    log.info(" >>")
    rv: ResultValue = ResultKo(Exception("Error"))
    try:
        date_format = '%d/%m/%Y'
        data_file_name = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), "..", "data",
            "reduced_report_data.csv")
        if args.date_range is not None:
            begin_dt = dt.datetime.strptime(args.date_range[0], date_format)
            end_dt = dt.datetime.strptime(args.date_range[1], date_format)
            if end_dt < begin_dt:
                log.error("Wrong date range: {b} < {e}".format(b=begin_dt,
                                                               e=end_dt))
                return False

            columns_report_charts = [
                "REPORT DATE", "Regione", "Ricoverati con sintomi",
                "Terapia intensiva", "Totale attualmente positivi", "DECEDUTI",
                "Isolamento domiciliare", "CASI TOTALI - A",
                "Totale tamponi effettuati", "SCHEMA VERSION"
            ]
            temp_content_dir = os.path.join(os.sep, 'tmp')
            rv = load_date_range_reports(begin=begin_dt,
                                         to=end_dt,
                                         context={
                                             "temp_dir": temp_content_dir,
                                             "data file": data_file_name,
                                             "columns": columns_report_charts,
                                             "save": True,
                                             "sort column": "REPORT DATE"
                                         })
            rv = ResultOk(True)

        elif args.get_date_range is not None and args.get_date_range == True:
            df = pd.read_csv(data_file_name, sep=',')
            msg = "Data minima: {dmin} - data massima: {dmax} - numero righe: {nr}".format(
                nr=df.shape,
                dmin=df["REPORT DATE"].min(),
                dmax=df["REPORT DATE"].max())
            print(msg)
            log.info(msg)
            rv = ResultOk(True)
        else:
            msg = "Nothing to do!"
            rv = ResultOk(True)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return False
    log.info(" (Is ok: {rv}) <<".format(rv=rv.is_ok()))
    return rv.is_ok()
예제 #12
0
def download_csv_file(url: str, data_file: str) -> ResultValue:
    log = logging.getLogger('download_csv_file')
    log.info(" >>")
    rv: ResultValue = ResultKo(Exception("Error"))
    try:
        result = requests.get(url)
        if result.status_code in [200]:
            with open(data_file, "w") as text_file:
                text_file.write(result.text)
            rv = ResultOk(True)
        else:
            msg = "Error downloading the data file: {e}.".format(
                e=result.reason)
            log.error(msg)
            rv = ResultKo(Exception(msg))

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        rv = ResultKo(ex)
    log.info(" <<")
    return rv
예제 #13
0
def translate_to_date(report_date: List[str]) -> ResultValue:
    #log.info("translate_to_date {p} >>".format(p=str(dt)))
    log = logging.getLogger('data_downloader')
    date = None
    months_names = {
        "gennaio": 1,
        "febbraio": 2,
        "marzo": 3,
        "aprile": 4,
        "maggio": 5,
        "giugno": 6,
        "luglio": 7,
        "agosto": 8,
        "settembre": 9,
        "ottobre": 10,
        "novembre": 11,
        "dicembre": 12
    }
    if len(report_date) >= 3:
        try:
            day = report_date[0]
            year = report_date[2]
            month = months_names.get(report_date[1].lower())
            if month is not None:
                #log.info("Dt: {d}/{m}/{y}".format(d=day,m=month,y=year))
                date = dt.datetime(year=int(year),
                                   month=int(month),
                                   day=int(day))
            else:
                ex = Exception("Unknown month: {m}".format(m=report_date[1]))
                log.error("Error in date translation - {e}".format(e=ex))
                return ResultKo(ex)
        except Exception as ex:
            log.error("Exception - {e}".format(e=ex))
            return ResultKo(ex)
    else:
        exc = Exception("Wrong format: {dt}".format(dt=str(dt)))
        log.error("Error in date translation - {e}".format(e=exc))
        return ResultKo(exc)
    return ResultOk(date)
예제 #14
0
def create_dataframe(pdf_url: str, local_file_path: str,
                     pdf_version: str) -> ResultValue:
    log = logging.getLogger('create_dataframe')
    log.info(" >>")
    ret_data_frame: ResultValue = ResultKo(Exception("Error"))
    try:
        file_downloaded_rv = get_web_file(pdf_url)
        if file_downloaded_rv.is_ok:
            if save_content_to_file(local_file_path,
                                    cast(bytes, file_downloaded_rv())) == True:
                to_df_rv = pdf_to_dataframe(local_file_path)
                if to_df_rv.is_ok():
                    df, report_date = to_df_rv()
                    ret_data_frame = refactor_region_df(
                        df, report_date, pdf_version)

    except Exception as ex:
        log.error(" failed - {ex}".format(ex=ex))
        return ResultKo(ex)

    log.info(" ({rv}) <<".format(rv=rv))
    return ret_data_frame
예제 #15
0
def calculate_daily_diffs(df: pd.DataFrame, in_col: str,
                          out_col: str) -> ResultValue:
    log = logging.getLogger('calculate_daily_diffs')
    log.info("({oc}) >>".format(oc=out_col))
    try:
        regions_list = df["Regione"].unique()
        for region in regions_list:
            mask = df["Regione"] == region
            df.loc[mask, out_col] = df.loc[mask, in_col].diff(periods=1)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(df)
예제 #16
0
def create_dataframe(data_file: str) -> ResultValue:
    log = logging.getLogger('create_dataframe')
    log.info(" >>")
    try:
        df = pd.read_csv(data_file,
                         sep=',',
                         parse_dates=["data_somministrazione"])

        df["totali"] = df["sesso_maschile"] + df["sesso_femminile"]

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(df)
예제 #17
0
def load_data_file(data_file: str) -> ResultValue:
    log = logging.getLogger('load_data_file')
    log.info(" >>")
    try:
        df = pd.read_csv(data_file,
                         sep=',',
                         parse_dates=["REPORT DATE"],
                         dtype={
                             "Ricoverati con sintomi": np.int64,
                             "CASI TOTALI - A": np.int64
                         })
    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(df)
예제 #18
0
def get_version_from_date(date: dt.datetime) -> ResultValue:
    log = logging.getLogger('get_version_from_date')
    log.info(" >>")
    version = ""
    if date >= dt.datetime.strptime("03/12/2020", '%d/%m/%Y'):
        version = "v6"
    elif date >= dt.datetime.strptime("25/06/2020", '%d/%m/%Y'):
        version = "v1"
    elif date >= dt.datetime.strptime("01/05/2020", '%d/%m/%Y'):
        version = "v5"
    else:
        ex = Exception("Unable to find a valid version for {d}".format(d=date))
        log.error("Error {e}".format(e=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(version)
예제 #19
0
def append_new_data(report_date: str, context: dict) -> ResultValue:
    log = logging.getLogger('append_new_data')
    log.info(" >>")
    try:
        date = dt.datetime.strptime(report_date, '%d/%m/%Y')

        version = get_version_from_date(date)
        if version.is_in_error():
            return ResultKo(version())
        pdf_file_name = "dpc-covid19-ita-scheda-regioni-{y}{m}{d}.pdf".format(
            y=date.year,
            m=str(date.month).rjust(2, '0'),
            d=str(date.day).rjust(2, '0'))
        pdf_url = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/schede-riepilogative/regioni/{fn}".format(
            fn=pdf_file_name)
        log.info("Url: {u}".format(u=pdf_url))

        content = get_web_file(pdf_url)
        if content.is_in_error():
            return ResultKo(content())

        file_name = os.path.join(context["temp_dir"], pdf_file_name)
        if save_content_to_file(file_name, cast(bytes,
                                                content())).is_in_error():
            return ResultKo(Exception("Error in save_content_to_file."))

        rv = pdf_to_dataframe(file_name)
        if rv.is_in_error():
            return ResultKo(Exception("Error in pdf_to_dataframe."))

        df, report_read_date = rv()
        df_regions = refactor_region_df(df, report_read_date, version())
        if df_regions.is_in_error():
            return ResultKo(df_regions())

        if context["save"] == True:
            rv = save_df_to_csv(df_regions(), context["data file"],
                                context["columns"], context["sort column"])
            if rv.is_in_error():
                return ResultKo(rv())
    except Exception as ex:
        log.error("append_new_data failed - {ex}".format(ex=ex))
        return ResultKo(ex)

    log.info(" <<")
    return ResultOk(df_regions)
예제 #20
0
def main(args: argparse.Namespace) -> ResultValue:
    log = logging.getLogger('Main')
    log.info(" >>")
    data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..",
                             "data", "report_data.csv")
    result = load_data_file(data_file=data_file)
    if result.is_in_error():
        return ResultKo(Exception("load data failed."))
    df = result()

    region_name = 'Lombardia'
    mask = df['Regione'] == region_name
    region_df = df.loc[mask, :]
    region_df = region_df.sort_values(["REPORT DATE"])
    x = region_df["REPORT DATE"]
    y = region_df["DECEDUTI"]
    y_tot = region_df["CASI TOTALI - A"]

    chart_composite(x, y, y_tot, region_name)
    log.info(" <<")
    return ResultOk(True)
예제 #21
0
def chart_vaccinations_male_female(df: pd.DataFrame,
                                   ax: mp.axes.Axes) -> ResultValue:
    log = logging.getLogger('chart_vaccinations_male_female')
    log.info(" >>")
    try:
        num_male = df["sesso_maschile"].sum()
        num_female = df["sesso_femminile"].sum()
        parts = [num_female, num_male]
        labels = ["Donne", "Uomini"]

        female_color = "#f1a29b"
        male_color = "#9bd7f1"
        ax.pie(parts,
               labels=labels,
               colors=[female_color, male_color],
               autopct='%1.1f%%')
        ax.set_title("Distribuzione per genere", fontsize=18)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(True)
예제 #22
0
def chart_vaccinations_fornitore(df: pd.DataFrame,
                                 ax: mp.axes.Axes) -> ResultValue:
    log = logging.getLogger('chart_vaccinations_fornitore')
    log.info(" >>")
    try:
        by_company = df.groupby(["fornitore"]).sum()
        by_company["totals"] = by_company["sesso_maschile"] + by_company[
            "sesso_femminile"]
        by_company.reset_index(level=0, inplace=True)

        values = by_company["totals"]
        labels = by_company["fornitore"]
        ax.pie(values,
               labels=labels,
               colors=["#dfeef4", "#c2e7f6", "#7fd2f3"],
               autopct='%1.1f%%')
        ax.set_title("Distribuzione per fornitore", fontsize=18)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(True)
예제 #23
0
def plot_vaccinations_by_time(df: pd.DataFrame,
                              df_delivered: pd.DataFrame,
                              ax: mp.axes.Axes,
                              wich: str = "first") -> ResultValue:
    log = logging.getLogger('plot_vaccinations_by_time')
    log.info(" >>")
    try:
        ln_one_color = "#f08814"
        ln_two_color = "#92b7e9"
        ln_one_label = "Cumulata numero vaccinazioni"
        ln_two_label = "Distribuzione giornaliera"

        grp_by_time = df.groupby("data_somministrazione").sum()
        x = grp_by_time.index.values
        y = grp_by_time["prima_dose"]
        y_cum_sum = grp_by_time["prima_dose"].cumsum()

        set_axes_common_properties(ax, no_grid=False)
        ax.get_yaxis().set_major_formatter(
            mp.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))

        remove_tick_lines('x', ax)
        remove_tick_lines('y', ax)

        ax.set_xticks(x)
        ax.set_xticklabels(x, rotation=80)
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))
        ax.xaxis.set_minor_formatter(mdates.DateFormatter("%d/%m"))
        ax.xaxis.set_major_locator(mdates.DayLocator(interval=2))
        ax.set_ylabel(ln_one_label, fontsize=14)
        ax.set_xlabel("Data", fontsize=14)
        ax.set_title("Vaccinazioni nel tempo - prima dose", fontsize=18)
        ax.tick_params(axis='y', colors=ln_one_color)
        ax.yaxis.label.set_color(ln_one_color)

        ax.scatter(x, y_cum_sum, color=ln_one_color, s=30, marker='.')
        ln_one = ax.plot(x,
                         y_cum_sum,
                         'b-',
                         linewidth=2,
                         color=ln_one_color,
                         label=ln_one_label)

        result = plot_delivered_vaccines_quantity(df_delivered, ax)
        if result.is_in_error() == True:
            log.error(result())
            return result
        line_three = result()

        ax_dec = ax.twinx()

        remove_tick_lines('y', ax_dec)
        remove_tick_lines('x', ax_dec)

        set_axes_common_properties(ax_dec, no_grid=True)

        ax_dec.scatter(x, y, color=ln_two_color, s=30, marker='.')
        ln_two = ax_dec.plot(x,
                             y,
                             'b-',
                             linewidth=2,
                             color=ln_two_color,
                             label=ln_two_label)

        ax_dec.set_ylabel(ln_two_label, fontsize=14)
        ax_dec.yaxis.label.set_color(ln_two_color)
        ax_dec.tick_params(axis='y', colors=ln_two_color)

        lns = ln_one + ln_two + line_three
        labs = [l.get_label() for l in lns]
        ax.legend(lns, labs, loc='upper left')

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(True)
예제 #24
0
def chart_composite(x: pd.Series, y_one: pd.Series, y_two: pd.Series,
                    region_name: str) -> ResultValue:
    log = logging.getLogger('chart_composite')
    log.info(" >>")
    try:
        locale.setlocale(locale.LC_ALL, 'it_IT.UTF-8')

        fig = plt.figure(figsize=(20, 10))
        gs1 = gridspec.GridSpec(1, 1, hspace=0.2, wspace=0.1, figure=fig)

        ax = []
        ax.append(fig.add_subplot(gs1[0, 0]))
        idx = 0
        set_axes_common_properties(ax[0], no_grid=False)

        ax[idx].get_yaxis().set_major_formatter(
            mp.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))

        remove_tick_lines('x', ax[idx])
        remove_tick_lines('y', ax[idx])

        ax[idx].set_xticks(x)
        ax[idx].set_xticklabels(x, rotation=80)

        ax[idx].xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))
        ax[idx].xaxis.set_minor_formatter(mdates.DateFormatter("%d/%m"))
        ax[idx].xaxis.set_major_locator(mdates.DayLocator(interval=7))
        ax[idx].set_ylabel("Numero", fontsize=14)
        ax[idx].set_xlabel("Data", fontsize=14)
        ax[idx].set_title("{reg} - {title} ".format(
            title="Deceduti/Ammalati - totale", reg=region_name),
                          fontsize=18)

        ax[idx].scatter(x, y_one, color="#b9290a", s=30, marker='.')
        ln_one = ax[idx].plot(x,
                              y_one,
                              'b-',
                              linewidth=2,
                              color="#f09352",
                              label="Totale ammalati")

        dec_color = "#8f0013"
        ax_dec = ax[idx].twinx()

        remove_tick_lines('y', ax_dec)
        set_axes_common_properties(ax_dec, no_grid=True)
        ax_dec.scatter(x, y_two, color=dec_color, s=30, marker='.')

        ln_two = ax_dec.plot(x,
                             y_two,
                             'b-',
                             linewidth=2,
                             color=dec_color,
                             label="Totale deceduti")

        ax_dec.set_ylabel("Totale deceduti", fontsize=14)
        ax_dec.yaxis.label.set_color(dec_color)
        ax_dec.tick_params(axis='y', colors=dec_color)

        lns = ln_one + ln_two
        labs = [l.get_label() for l in lns]
        ax[idx].legend(lns, labs, loc='upper left')

    #ax_dec.axhline(c='#f0b4a7', lw=1)

    #ax[idx].legend(fontsize=12, loc='upper left')

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(plt)
예제 #25
0
def refactor_region_df(df: pd.DataFrame,
                       report_date: dt.datetime,
                       pdf_version: str = "v1") -> ResultValue:
    log = logging.getLogger('refactor_region_df')
    log.info(" ({ver} - {dt}) >>".format(dt=report_date, ver=pdf_version))
    log.debug("\n{d}".format(d=str(df)))
    df_res = None
    try:
        df_res = df
        if pdf_version == "v1":
            df_res.rename(columns={
                df_res.columns[0]: "Regione",
                df_res.columns[1]: "Ricoverati con sintomi",
                df_res.columns[2]: "Terapia intensiva",
                df_res.columns[3]: "Isolamento domiciliare",
                df_res.columns[4]: "Totale attualmente positivi",
                df_res.columns[5]: "DIMESSI/GUARITI",
                df_res.columns[6]: "DECEDUTI",
                df_res.columns[7]: "CASI TOTALI - A",
                df_res.columns[8]:
                "INCREMENTO CASI TOTALI (rispetto al giorno precedente)",
                df_res.columns[9]: "Casi identificatidal sospettodiagnostico",
                df_res.columns[10]: "Casi identificatida attività discreening",
                df_res.columns[11]: "CASI TOTALI - B",
                df_res.columns[12]: "Totale casi testati",
                df_res.columns[13]: "Totale tamponi effettuati",
                df_res.columns[14]: "INCREMENTO TAMPONI"
            },
                          inplace=True)
        elif pdf_version in ["v6"]:
            df_res.rename(columns={
                df_res.columns[0]: "Regione",
                df_res.columns[1]: "Ricoverati con sintomi",
                df_res.columns[2]: "Terapia intensiva",
                df_res.columns[3]: "Terapia intensiva / Ingressi delgiorno",
                df_res.columns[4]: "Isolamento domiciliare",
                df_res.columns[5]: "Totale attualmente positivi",
                df_res.columns[6]: "DIMESSI/GUARITI",
                df_res.columns[7]: "DECEDUTI",
                df_res.columns[8]: "CASI TOTALI - A",
                df_res.columns[9]:
                "INCREMENTO CASI TOTALI (rispetto al giorno precedente)",
                df_res.columns[10]: "Totale persone testate",
                df_res.columns[11]: "Totale tamponi effettuati",
                df_res.columns[12]: "INCREMENTO TAMPONI"
            },
                          inplace=True)

        elif pdf_version in ["v5"]:
            df_res.rename(columns={
                df_res.columns[0]: "Regione",
                df_res.columns[1]: "Ricoverati con sintomi",
                df_res.columns[2]: "Terapia intensiva",
                df_res.columns[3]: "Isolamento domiciliare",
                df_res.columns[4]: "Totale attualmente positivi",
                df_res.columns[5]: "DIMESSI/GUARITI",
                df_res.columns[6]: "DECEDUTI",
                df_res.columns[7]: "CASI TOTALI - A",
                df_res.columns[8]:
                "INCREMENTO CASI TOTALI (rispetto al giorno precedente)",
                df_res.columns[9]: "Totale tamponi effettuati",
                df_res.columns[10]: "Casi testati"
            },
                          inplace=True)
        else:
            ex = Exception("Unknown pdf version: {pv}".format(pv=pdf_version))
            log.error("Error - {ex}".format(ex=ex))
            return ResultKo(ex)

        df_res[
            "REPORT DATE"] = report_date  #pd.to_datetime(report_date, format="%d/%m/%Y")
        df_res["SCHEMA VERSION"] = pdf_version
        log.debug("\n{d}".format(d=str(df_res)))

    except Exception as ex:
        log.error(" failed - {ex}".format(ex=ex))
        return ResultKo(ex)

    log.info(" <<")
    return ResultOk(df_res)
예제 #26
0
def pdf_to_dataframe(pdf_file_name: str) -> ResultValue:
    log = logging.getLogger('pdf_to_dataframe')
    log.info(" ({fn}) >>".format(fn=pdf_file_name))
    df = None
    report_date: dt.datetime = dt.datetime(1964, 8, 3, 0, 0)
    try:
        df = tabula.read_pdf(pdf_file_name, pages='all')
        #log.info("Df list len: {l}".format(l=len(df)))

        csv_file = os.path.splitext(pdf_file_name)[0] + ".csv"
        tabula.convert_into(pdf_file_name,
                            csv_file,
                            output_format="csv",
                            pages='all')
        list_reg = []
        with open(csv_file, "r") as fh:
            start = False
            end = False
            reg = re.compile("(\d{1,3}) (\d)")
            for line in fh:
                if line.startswith("Lombardia") == True:
                    start = True
                if line.startswith("TOTALE") == True:
                    end = True
                    start = False
                if start == True:
                    line = line.replace(".", "")
                    line = line.replace("+ ", "")
                    #line = line.replace(" ", ",")
                    line = reg.sub("\\1,\\2", line)
                    line = line.replace("\n", "")
                    list_reg.append(line)
                if 'Aggiornamento casi Covid-19' in line:
                    parts = line.split(" - ")
                    if len(parts) > 1:
                        report_date_s = parts[0]
                        if parts[0][0] == "\"":
                            report_date_s = parts[0][1:]
                        log.debug(report_date)
                        report_date_rv = translate_to_date(
                            report_date_s.split(" "))
                        if report_date_rv.is_in_error():
                            msg = "Error in date translation."
                            log.error(msg)
                            return ResultKo(Exception(msg))
                        else:
                            report_date = report_date_rv()
                elif 'AGGIORNAMENTO ' in line:
                    parts = line.split(" ")
                    if len(parts) > 1:
                        report_date = dt.datetime.strptime(
                            parts[1], '%d/%m/%Y')
                        log.info("RDate: {rd}".format(rd=report_date))

        df = pd.DataFrame([line.split(",") for line in list_reg])

    except Exception as ex:
        log.info("pdf_to_dataframe failed - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" (report_date={rd}) <<".format(rd=report_date))
    return ResultOk((df, report_date))
예제 #27
0
def chart_single_line(x: pd.Series, y: pd.Series,
                      context: dict) -> ResultValue:
    log = logging.getLogger('chart_composite')
    log.info(" >>")
    try:
        if context.get('region name') is None:
            msg = "Error: region name field is mandatory."
            log.error(msg)
            return ResultKo(Exception(msg))
        else:
            region_name = context["region name"]
        if context.get('title') is None:
            msg = "Error: title field is mandatory."
            log.error(msg)
            return ResultKo(Exception(msg))
        else:
            title = context["title"]
        fig = plt.figure(figsize=(20, 10))
        gs1 = gridspec.GridSpec(1, 1, hspace=0.2, wspace=0.1, figure=fig)

        ax = []
        ax.append(fig.add_subplot(gs1[0, 0]))
        idx = 0
        set_axes_common_properties(ax[0], no_grid=False)

        remove_tick_lines('x', ax[idx])
        remove_tick_lines('y', ax[idx])
        ax[idx].set_xticks(x)
        ax[idx].set_xticklabels(x, rotation=80)

        ax[idx].xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))
        ax[idx].xaxis.set_minor_formatter(mdates.DateFormatter("%d/%m"))
        ax[idx].xaxis.set_major_locator(mdates.DayLocator(interval=7))
        ax[idx].set_ylabel("Numero", fontsize=14)
        ax[idx].set_xlabel("Data", fontsize=14)
        ax[idx].set_title("{reg} - {title} ".format(title=title,
                                                    reg=region_name),
                          fontsize=18)

        ax[idx].scatter(x, y, color="#b9290a", s=30, marker='.', label=title)
        ax[idx].plot(x, y, 'b-', linewidth=2, color="#f09352")
        if context.get('dad begin date') is not None:
            ax[idx].axvline(context.get('dad begin date'), color="#048f9e")
            ax[idx].text(0.50,
                         0.25,
                         'Inizio dad scuole superiori',
                         horizontalalignment='center',
                         verticalalignment='center',
                         transform=ax[idx].transAxes,
                         rotation=90,
                         color="#048f9e",
                         fontsize=12)
            ax[idx].axvline(context.get('school opening date'),
                            color="#048f9e")
            ax[idx].text(0.95,
                         0.25,
                         'Riapertura scuole',
                         horizontalalignment='center',
                         verticalalignment='center',
                         transform=ax[idx].transAxes,
                         rotation=90,
                         color="#048f9e",
                         fontsize=12)

        ax[idx].legend(fontsize=12, loc='upper left')
    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        return ResultKo(ex)
    log.info(" <<")
    return ResultOk(plt)
예제 #28
0
def main(args: argparse.Namespace) -> ResultValue:
    log = logging.getLogger('Main')
    log.info(" >>")
    rv: ResultValue = ResultKo(Exception("Error"))
    try:
        today = dt.datetime.now().strftime("%Y%m%d")

        if args.download_vaccinazioni == True:
            file_name = "{dt}_vaccinazioni.csv".format(dt=today)
            url = "https://raw.githubusercontent.com/italia/covid19-opendata-vaccini/master/dati/somministrazioni-vaccini-latest.csv"
        if args.download_consegne == True:
            file_name = "{dt}_vaccini_consegnati.csv".format(dt=today)
            url = "https://raw.githubusercontent.com/italia/covid19-opendata-vaccini/master/dati/consegne-vaccini-latest.csv"

        if args.download_vaccinazioni == True or args.download_consegne == True:
            data_file = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), "..", "data",
                file_name)
            rv = download_csv_file(url=url, data_file=data_file)
            if rv.is_in_error():
                msg = "Data download error: {e}".format(e=rv.value)
                log.error(msg)
            else:
                msg = "Data downloaded."
                log.info(msg)

        if args.chart == True:
            data_file = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), "..", "data",
                "vaccinazioni.csv")
            rv = create_dataframe(data_file=data_file)
            if rv.is_in_error():
                log.error(rv.value())
                return ResultKo(rv())
            df = rv.value()

            data_file = os.path.join(
                os.path.dirname(os.path.realpath(__file__)), "..", "data",
                "vaccini_consegnati.csv")
            rv = create_delivered_dataframe(data_file=data_file)
            if rv.is_in_error():
                log.error(rv.value())
                return ResultKo(rv())
            df_delivered = rv()

            region_name = "Lombardia"
            mask_region = (df["nome_area"] == region_name)
            df_region = df.loc[mask_region, [
                "data_somministrazione", "totali", 'fascia_anagrafica',
                "sesso_maschile", "sesso_femminile", "fornitore", "prima_dose",
                "seconda_dose"
            ]]

            mask_region = (df_delivered["nome_area"] == region_name)
            df_delivered_region = df_delivered.loc[
                mask_region, ["fornitore", "numero_dosi", "data_consegna"]]

            fig = plt.figure(figsize=(20, 10))
            gs1 = gridspec.GridSpec(1, 1, hspace=0.2, wspace=0.1, figure=fig)
            ax = []
            ax.append(fig.add_subplot(gs1[0, 0]))
            idx = 0

            rv = plot_delivered_vaccines_quantity(df_delivered_region,
                                                  ax=ax[idx])
            if rv.is_ok():
                plt.savefig(os.path.join(os.sep, "tmp", "vaccini_fig.png"),
                            bbox_inches='tight',
                            pad_inches=0.2)

    except Exception as ex:
        log.error("Exception caught - {ex}".format(ex=ex))
        rv = ResultKo(ex)
    log.info(" ({rv}) <<".format(rv=rv.is_ok()))
    return rv