Python to_dataframe 예제들, scrape.entorb.to_dataframe Python 예제들

예제 #1

0

파일 보기

def plot_rank(title='Inzidenz',
              func=weekly_r,
              states=DE_STATE_NAMES,
              population=DE_STATE_POPULATION):
    global cached_dfs

    fig, axes = plt.subplots(nrows=4, ncols=4, sharex=True, sharey=True)

    de = None
    for iso, state in states.items():
        if iso not in cached_dfs:
            cached_dfs[iso] = entorb.to_dataframe(iso)
        if de is None:
            de = func(cached_dfs[iso], population=population[state])
            de.drop(columns=[c for c in de.columns if c != 'Cases'])
            de.rename(columns={'Cases': state}, inplace=True)
        else:
            de[state] = func(cached_dfs[iso],
                             population=population[state])['Cases']
    rnk = de.rank(axis=1).rolling('30D').mean()

    ax_idx = 0
    for iso, state in states.items():
        rnk[state].plot(kind='line',
                        ax=axes.flat[ax_idx],
                        title=state,
                        legend=None)
        ax_idx += 1

    fig.suptitle(title)
    fig.set_size_inches(16, 16)
    fig.tight_layout()
    return fig

예제 #2

0

파일 보기

def plot_press_chronic():
    global cached_dfs
    de = None
    if 'DE-total' in cached_dfs:
        de = cached_dfs['DE-total']
    else:
        de = entorb.to_dataframe('DE-total')

    rs1 = polynomial_r(de)
    rs2 = weekly_r(de)
    news = pd.read_csv('data/chronic_de.tsv',
                       sep="\\t",
                       usecols=['Datum', 'Ereignis'],
                       engine='python')
    news['Datum'] = pd.to_datetime(news['Datum'], format='%Y-%m-%d')
    news = news.set_index('Datum')

    fig, ax = plt.subplots()
    rs1['Cases'].transpose().plot(ax=ax, label='Logistisch')
    rs2['Cases'].transpose().plot(ax=ax,
                                  secondary_x=True,
                                  label='Wochenfälle je 100 000 Ew.')
    ax.set_title("Fall-Rate. COVID-19-Chronik der FAZ")
    ax.set_yticks(news.index)
    ax.set_ylim(news.index.max() + timedelta(days=4), rs1['Cases'].index.min())
    ax.set_yticklabels(news['Ereignis'])
    ax.grid()
    ax.set_ylabel('')

    plt.legend(loc='lower right')

    fig.set_size_inches(9, 16)

    return fig

예제 #3

0

파일 보기

def plot_weekly_r(col='Cases', ncols=4):
    """
    Plot of <col> per week
    
    Args:
        col: What to plot. 'Cases' by default.
        ncols: Columns of charts of resulting figure.

    Return:
        Figure
    """
    global cached_dfs
    areas = sorted([x for x in DE_STATE_NAMES])
    fig, axes = plt.subplots(nrows=4, ncols=4, sharex=True, sharey=True)
    for i, (ax, area) in enumerate(zip(axes.flat, areas)):
        de = None
        if area in cached_dfs:
            de = cached_dfs[area]
        else:
            de = entorb.to_dataframe(area)
            cached_dfs[area] = de
        rs = weekly_r(de, DE_STATE_POPULATION[DE_STATE_NAMES[area]])
        rs[col].plot(ax=ax, title=DE_STATE_NAMES[area])
    fig.suptitle("Weekly new cases")
    fig.set_size_inches(16, 16)
    fig.tight_layout()
    return fig

예제 #4

0

파일 보기

def plot_r(col='Cases', population=DE_POPULATION):
    global cached_dfs
    lasts = []
    lasts_rki = []
    areas = sorted([x for x in DE_STATE_NAMES])
    fig, axes = plt.subplots(nrows=4, ncols=4, sharex=True, sharey=True)
    for i, (ax, area) in enumerate(zip(axes.flat, areas)):
        de = None
        if area in cached_dfs:
            de = cached_dfs[area]
        else:
            de = entorb.to_dataframe(area).rolling('7D').mean()
            cached_dfs[area] = de

        rs = polynomial_r(de, population[DE_STATE_NAMES[area]])
        lasts.append(rs[col].tail(1).values[0])

        ax = rs[col].plot(
            ax=ax,
            title="%s (%d Ew.)" %
            (DE_STATE_NAMES[area], population[DE_STATE_NAMES[area]]))

        rs = rki_r(de)
        lasts_rki.append(rs[col].tail(1).values[0])
        rs[col].plot(ax=ax)

    fig.set_size_inches(16, 16)
    fig.tight_layout()
    return fig, lasts, lasts_rki

예제 #5

0

파일 보기

파일: source_comparison.py 프로젝트: pschwede/covid19plots

def plot_source_deltas():
    fig, axes = plt.subplots(nrows=6)

    da = entorb.to_dataframe(nation='DE')
    db = risklayer.to_dataframe(nation='DE')
    for ax, col in zip(axes.flat[:2], ['Cases', 'Cases_New']):
        (da[col] - db[col]).plot(kind='bar',
                                 label="%s, delta entorb - risklayer" % col,
                                 sharex=True,
                                 ax=ax)
        ax.legend()

    dc = rki.to_dataframe(nation='DE')
    for ax, col in zip(axes.flat[2:4], ['Cases', 'Cases_New']):
        (da[col] - dc[col]).plot(kind='bar',
                                 label="%s, delta entorb - rki" % col,
                                 sharex=True,
                                 ax=ax)
        ax.legend()

    for src in [entorb, rki, risklayer]:
        df = {entorb: da, rki: dc, risklayer: db}[src]
        for ax, col in zip(axes.flat[4:], ['Cases', 'Cases_New']):
            df[col].plot(label="%s, %s" % (col, src.__name__),
                         sharex=True,
                         ax=ax)
            ax.legend()

    fig.set_size_inches(16, 9)
    return fig

예제 #6

0

파일 보기

def main():
    import sys
    if len(sys.argv) < 3:
        print("USAGE: %s OUTFILEprojection1 OUTFILEfuture_unld2" % sys.argv[0])
        sys.exit(2)
    de = entorb.to_dataframe('DE-total')
    with plt.style.context('ggplot'):
        plot_projection(de, DE_POPULATION).savefig(sys.argv[1],
                                                   bbox_inches='tight')
        DE_STATE_NAMES,
        plot_projections([n for n in DE_STATE_NAMES], \
                [entorb.to_dataframe(s) for s in DE_STATE_NAMES], \
                [DE_STATE_POPULATION[DE_STATE_NAMES[s]] for s in DE_STATE_NAMES]) \
                .savefig(sys.argv[2], bbox_inches='tight')
        plot_projection(de=entorb.to_dataframe(nation='US'),
                        population=329e6).savefig(sys.argv[3],
                                                  bbox_inches='tight')

예제 #7

0

파일 보기

파일: correlate.py 프로젝트: pschwede/covid19plots

def plot_correlation():
    # https://query.wikidata.org/
    dictlist = return_sparql_query_results("""
    SELECT DISTINCT ?code ?population ?area ?ppp ?ngdp ?growth ?totrsv ?hdi ?medinc ?literacy ?life_expectancy ?fertility_rate
    {
      ?country wdt:P31 wd:Q3624078 ;
               wdt:P297 ?code ;
               wdt:P2046 ?area ;
               wdt:P1082 ?population .
        OPTIONAL {
               ?country wdt:P4010 ?ppp .
               ?country wdt:P2131 ?ngdp .
               ?country wdt:P2219 ?growth .
               ?country wdt:P2134 ?totrsv .
               ?country wdt:P1081 ?hdi .
               ?country wdt:P3529 ?medinc .
               ?country wdt:P6897 ?literacy .
               ?country wdt:P2250 ?life_expectancy .
               ?country wdt:P4841 ?fertility_rate .
        }
    }
    """)['results']['bindings']
    df = pd.DataFrame({
        k: [x[k]['value'] if k is 'code' else float(x[k]['value']) if k in x else None for x in dictlist] \
                for k in ['code', 'population', 'area', 'ppp', 'ngdp', 'growth', 'totrsv', 'hdi', 'medinc', 'literacy', 'life_expectancy', 'fertility_rate']}) \
                            .set_index('code')
    df['density'] = df['population'] / df['area']
    df['ngdp/p'] = df['ngdp'] / df['population']
    df['ppp/p'] = df['ppp'] / df['population']
    df['growth/p'] = df['growth'] / df['population']
    df['totrsv/p'] = df['totrsv'] / df['population']
    df['hdi/p'] = df['hdi'] / df['population']

    cols = ['Deaths_Per_Million']
    dict_entorb = {c: [] for c in cols}
    for area in df.index:
        try:
            for col in cols:
                dict_entorb[col].append(
                    entorb.to_dataframe(nation=area)[col].values[-1])
        except:
            for col in cols:
                dict_entorb[col].append(None)
    for col in dict_entorb:
        df[col] = dict_entorb[col]

    ncols = int((len(df.columns) - 1) / 4 + 1)
    fig, axes = plt.subplots(ncols=ncols, nrows=4)
    for ax, col in zip(axes.flat, [c for c in df.columns if c not in cols]):
        df.plot(kind='scatter',
                y='Deaths_Per_Million',
                logy=True,
                x=col,
                sharey=False,
                ax=ax)
    fig.set_size_inches(16, 4 * ncols)
    return fig

예제 #8

0

파일 보기

def plot_rki_and_logistic_total(state='DE-total'):
    """
    Plot rki and logistic rates for Germany.

    Args:
        state: ISO code of German district, DE-total or Country

    Return:
        Figure
    """
    global cached_dfs
    de = None
    if state not in cached_dfs:
        de = entorb.to_dataframe(state)
        cached_dfs[state] = de
    else:
        de = cached_dfs[state]

    fig, ax = plt.subplots()
    ax2 = ax.twinx()

    l = ax2.plot(de.index,
                 de['Cases_New'].rolling('7D').mean(),
                 color='k',
                 label='New Cases rolling week')
    ax2.plot(de.index,
             de['Cases_New'],
             linestyle=':',
             color=l[0].get_color(),
             label='New Cases')

    poly = polynomial_r(de)['Cases']
    l = ax.plot(de.index,
                poly.rolling('7D').mean(),
                label='Logistic rate rolling week')
    ax.plot(de.index,
            poly,
            linestyle=':',
            color=l[0].get_color(),
            label='Logistic rate')

    rki = rki_r(de)['Cases']
    l = ax.plot(de.index,
                rki.rolling('7D').mean(),
                label='RKI rate rolling week')
    ax.plot(de.index,
            rki,
            linestyle=':',
            color=l[0].get_color(),
            label='RKI rate')

    plt.legend(handles=ax.lines + ax2.lines)
    ax.axhline(1.0, color='g', alpha=0.5)
    fig.set_size_inches(16, 9)
    fig.tight_layout()
    return fig

예제 #9

0

파일 보기

def plot_rki_and_logistic(col='Cases',
                          ncols=4,
                          population=DE_STATE_POPULATION):
    """
    Plot of reproduction of <col>
    
    Args:
        col: What to plot. 'Cases' by default.
        ncols: Columns of charts of resulting figure.

    Return:
        Tuple of a Figure, a list of last logistical rates and a list of last rki values
    """
    global cached_dfs
    areas = sorted([x for x in DE_STATE_NAMES])
    lasts = {'area': areas, 'logistic': [], 'rki': [], 'weekly': []}
    fig, axes = plt.subplots(nrows=4, ncols=4, sharex=True, sharey=True)
    for i, (ax, area) in enumerate(zip(axes.flat, areas)):
        de = None
        if area in cached_dfs:
            de = cached_dfs[area].rolling('7D').mean()
        else:
            cached_dfs[area] = entorb.to_dataframe(area)
        de = cached_dfs[area].rolling('7D').mean()

        rs = polynomial_r(de, population[DE_STATE_NAMES[area]])
        lasts['logistic'].append(rs[col].values[-1])

        rs[col].plot(ax=ax,
                     label="logistic",
                     title="%s (%d Ew." %
                     (DE_STATE_NAMES[area], population[DE_STATE_NAMES[area]]))

        rs = rki_r(de)
        lasts['rki'].append(rs[col].values[-1])
        rs[col].plot(ax=ax, ylim=(1, 2), label="rki", sharex=True, sharey=True)

        rs = weekly_r(de, population[DE_STATE_NAMES[area]])
        lasts['weekly'].append(rs[col].values[-1])
        #ax2 = rs[col].plot(ax=ax, label="weekly", sharex=True, sharey=True, logy=True)

    fig.set_size_inches(16, 16)
    return fig, pd.DataFrame(lasts).set_index('area')

예제 #10

0

파일 보기

파일: divi.py 프로젝트: pschwede/covid19plots

def plot_history(dataframe, fname):
    """Plot each federal state's total exhaustion from @entorb but also keep each curve in
    dict bundesland_curves for later correlations"""
    availability = dict()
    deaths = dict()

    df = dataframe.set_index('Stand').bfill()

    for federal_state in federal_state_translation:
        availability[federal_state] = df[df['Bundesland'] ==
                                         federal_state].resample('D').mean()
        deaths[federal_state] = entorb.to_dataframe(
            federal_state)['Deaths_New']
        deaths[federal_state].name = 'Deaths'

    for federal_state in sorted(list(federal_state_translation.keys())):
        fig, ax = plt.subplots()

        ax = deaths[federal_state].plot(kind='line',
                                        ax=ax,
                                        color='grey',
                                        linestyle=':',
                                        marker='+',
                                        legend=True,
                                        figsize=(16, 9))
        ax.set_xlabel('Deaths')

        ax = availability[federal_state].plot(
            kind='line', ax=ax, secondary_y=True,
            marker='')  # drawstyle=steps-post
        ax.set_yticks([
            0.0,  # * num_clinics[federal_state],
            0.5,  # * num_clinics[federal_state],
            1.0,  # * num_clinics[federal_state],
        ])
        ax.set_yticklabels(["available", "limited", "unavailable"])
        ax.set_title("%s" % federal_state_translation[federal_state])

        fig.savefig(fname % {'state': federal_state})

예제 #11

0

파일 보기

def main():
    import sys
    area = 'DE-total'
    de = entorb.to_dataframe(area)