Ejemplos de AutoSizeParams en Python, ejemplos de altair.AutoSizeParams en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: functions.py Proyecto: suatakbulut/usedcars-capstone

    def barChart(self, Model):
        years = range(self.year - 2, min(self.year + 3,
                                         datetime.now().year + 1))
        conditions = ('fair', 'good', 'excellent', 'like new')
        df = pd.DataFrame(list(product(years, conditions)),
                          columns=['year', 'condition'])
        for col in set(self.df.columns).difference(set(df.columns)):
            df[col] = self.df[col][0]
        cols = [
            'state', 'year', 'manufacturer', 'model', 'odometer',
            'transmission', 'condition', 'BaseMSRP'
        ]
        df = df[cols]
        df['price'] = Model.model.predict(df)
        barChart = alt.Chart(df).mark_bar().encode(
            x=alt.X('condition:O',
                    sort=['fair', 'good', 'excellent', 'like new']),
            y='price:Q',
            color=alt.Color('condition:O',
                            sort=['fair', 'good', 'excellent', 'like new'],
                            scale=alt.Scale(scheme='purplegreen')),
            column='year:N').properties(height=240,
                                        width=140,
                                        padding=10,
                                        autosize=alt.AutoSizeParams(
                                            type='fit', contains='padding'))

        #return barChart.to_json(indent=None).encode('utf-8')
        return barChart.to_json(indent=None)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: legacyfigures.py Proyecto: ISI-MIP/isipedia-library

def _rankingmap_altair(countries,
                       ranking,
                       x,
                       scenario=None,
                       method='number',
                       title='',
                       label=''):
    # Adapted from https://altair-viz.github.io/gallery/index.html

    import pandas as pd
    import altair as alt

    if method not in ['number', 'value']:
        raise ValueError('method must be "number" or "value"')

    source = alt.Data(values=countries)

    if ranking.plot_type == 'indicator_vs_temperature':
        details = 'warming level: {} {}'.format(x, ranking.plot_unit_x)
    else:
        details = 'period: {}, scenario: {}'.format(
            x, {
                'rcp26': 'RCP 2.6',
                'rcp45': 'RCP 4.5',
                'rcp60': 'RCP 6',
                'rcp85': 'RCP 8.5'
            }.get(scenario, scenario))
    default_title = getattr(ranking, 'plot_label_y', '') + '\n' + details
    # default_label = 'ranking number' if method == 'number' else ('ranking value ({})'.format(getattr(ranking, 'plot_unit_y')))

    ranking_data = get_ranking_data(countries, ranking, x, scenario, method)

    chart = alt.Chart(source).mark_geoshape().encode(
        # color="Rank:Q",
        color=alt.Color("Rank:Q", sort='ascending')
        if method == 'number' else alt.Color("Value:Q", sort='descending'),
        # tooltip=["Country:N", "Code:N", "Value:Q", "Rank:Q"]
        tooltip=[
            "label:N", "unit:N", "Country:N", "Code:N", "Value:Q", "Rank:Q"
        ]).transform_lookup(lookup='properties.ISIPEDIA',
                            from_=alt.LookupData(ranking_data, 'Code',
                                                 ranking_data.columns.tolist())
                            ).project('naturalEarth1').properties(
                                width=800,
                                autosize=alt.AutoSizeParams(contains="padding",
                                                            type="fit-x"),
                                title=ranking.plot_title
                                # ).configure_view(stroke=None
                            ).configure(background='#F1F4F4').configure_title(
                                fontSize=16, ).configure_axis(
                                    labelFontSize=14,
                                    titleFontSize=16,
                                ).configure_legend(
                                    titleFontSize=14,
                                    labelFontSize=14,
                                ).configure_mark(fontSize=14)
    # ).interactive()

    return chart

Ejemplo n.º 3

0

Mostrar archivo

Archivo: legacyfigures.py Proyecto: ISI-MIP/isipedia-library

def _countrymap_altair(mapdata,
                       countrymasksnc,
                       jsfile,
                       x=None,
                       scenario=None,
                       climate=None,
                       impact=None,
                       title='',
                       label=''):
    """
    """
    import altair as alt
    import numpy as np
    import pandas as pd

    area = jsfile.area

    name, ext = os.path.splitext(os.path.basename(jsfile.filename))
    if name.endswith(area):
        name = name[:-len(area) - 1]

    bnds = mapdata.bounds(area)
    worldmap = mapdata.get(name, x, scenario, climate, impact)
    localmap = bnds.extract(worldmap)
    if 'm_' + area in countrymasksnc.variables:
        mask = bnds.extract(countrymasksnc['m_' + area]) > 0
    elif area == 'world':
        mask = np.zeros_like(worldmap, dtype=bool)
        for k in countrymasksnc.variables:
            if not k.startswith('m_'):
                continue
            mask[countrymasksnc[k][:] > 0] = True
    else:
        mask = np.ones_like(worldmap, dtype=bool)

    localmap = localmap[::-1]
    mask = mask[::-1]

    ni, nj = localmap.shape
    # l, r, b, t = bnds.extent
    # x = np.linspace(l, r, nj)
    # y = np.linspace(t, b, ni)
    l, r, b, t = bnds.indices
    x = countrymasksnc['lon'][l:r + 1]
    y = countrymasksnc['lat'][t:b + 1][::-1]
    X, Y = np.meshgrid(x, y)

    l, r, b, t = bnds.extent
    aspect = (t - b) / (r - l) / np.cos(np.deg2rad((t + b) / 2))
    print(jsfile.area, 'aspect', aspect)

    # Convert this grid to columnar data expected by Altair
    source = pd.DataFrame({
        'lon': X[mask].round(2),
        'lat': Y[mask].round(2),
        'z': localmap[mask]
    })

    chart = alt.Chart(source).mark_rect().encode(
        x='lon:O',
        y=alt.Y('lat:O', sort='descending'),
        color=alt.Color('z:Q', title=''),
        tooltip=[
            alt.Tooltip('z:Q',
                        title='{} ({})'.format(jsfile.plot_label_y,
                                               jsfile.plot_unit_y)), 'lon:Q',
            'lat:Q'
        ]).properties(
            title=jsfile.plot_title,
            width=800,
            height=int(800 * aspect),
            autosize=alt.AutoSizeParams(contains="padding", type="fit-x"),
        ).configure(background='#F1F4F4').configure_header(
            titleFont="IBM Plex Sans",
            titleFontSize=20,
            labelFont="IBM Plex Sans",
            labelFontSize=18,
        ).configure_title(fontSize=16, ).configure_axis(
            labelFontSize=14,
            titleFontSize=16,
        ).configure_legend(
            titleFontSize=14,
            labelFontSize=14,
        ).configure_mark(fontSize=14).interactive()

    return chart

Ejemplo n.º 4

0

Mostrar archivo

Archivo: legacyfigures.py Proyecto: ISI-MIP/isipedia-library

def _lineplot_altair_temp_advanced(data,
                                   x=None,
                                   scenario=None,
                                   climate=None,
                                   impact=None,
                                   shading=False,
                                   title='',
                                   xlabel='',
                                   ylabel=''):
    import pandas as pd
    import altair as alt

    df = pd.concat([pd.DataFrame(l) for l in data.filter_lines()])

    df["model"] = df.climate + " / " + df.impact

    # Divide by 100 so we can percent-format in the plot
    df.y = df.y / 100

    selection_climate = alt.selection_multi(fields=['climate'], bind='legend')

    nearest = alt.selection(type='single',
                            nearest=True,
                            on='mouseover',
                            fields=["x", 'y'],
                            empty='none')

    base = alt.Chart(df[(df.climate != "median")])

    color = alt.Color('climate',
                      scale=alt.Scale(scheme="dark2"),
                      title='Climate Model')

    area = base.mark_area(opacity=0.3).encode(
        x=alt.X("x", scale=alt.Scale(domain=[0, df['x'].max()])),
        color=color,
        y=alt.Y(field="y",
                type="quantitative",
                axis=alt.Axis(format='%'),
                aggregate="min"),
        y2=alt.Y2(field="y", aggregate="max"),
        opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)),
    ).add_selection(selection_climate)

    lines = base.mark_line().encode(
        x=alt.X("x"),
        y=alt.Y("y", axis=alt.Axis(format='%')),
        detail=["climate", "impact"],
        color=color,
        opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)),
        size=alt.condition("datum.impact == 'median'", alt.value(5),
                           alt.value(1)),
    )

    points = base.mark_point().encode(
        x=alt.X("x",
                axis=alt.Axis(
                    title=xlabel
                    or '{} ({})'.format(data.plot_label_x, data.plot_unit_x),
                    values=data.x)),
        y=alt.Y("y", axis=alt.Axis(title=data.plot_unit_y, format='%')),
        detail=["climate", "impact"],
        color=color,
        opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)),
        size=alt.value(12))

    text_model = points.mark_text(align='left', dx=-5, dy=-6).encode(
        text=alt.condition(nearest, "model", alt.value(' ')),
        opacity=alt.condition(selection_climate, alt.value(1), alt.value(0)),
        color=alt.value("black")).add_selection(nearest)

    text_pct = points.mark_text(align='left', dx=-5, dy=6).encode(
        text=alt.condition(nearest, "y", alt.value(' '), format=".2p"),
        opacity=alt.condition(selection_climate, alt.value(1), alt.value(0)),
        color=alt.value("black"))

    chart = (area + lines + points + text_model + text_pct).properties(
        title=data.plot_title,
        width=800,
        autosize=alt.AutoSizeParams(contains="padding", type="fit-x"),
    )

    return configure_chart(chart).interactive()

Ejemplo n.º 5

0

Mostrar archivo

Archivo: legacyfigures.py Proyecto: ISI-MIP/isipedia-library

def _lineplot_altair_temp(data,
                          x=None,
                          scenario=None,
                          climate=None,
                          impact=None,
                          shading=False,
                          title='',
                          xlabel='',
                          ylabel=''):

    # median data
    df = data.to_pandas().loc[scenario]
    lower = df.min(axis=0) / 100
    upper = df.max(axis=0) / 100
    median = df.loc['median', 'median'] / 100
    df2 = pd.DataFrame({
        'lower': lower,
        'upper': upper,
        'median': median,
        'climate': 'Median'
    }).reset_index()

    if not title:
        title = data.plot_title
    if not xlabel:
        xlabel = '{} ({})'.format(data.plot_label_x, data.plot_unit_x)
        # xlabel = data.plot_unit_x
    if not ylabel:
        # ylabel = '{} ({})'.format(data.plot_label_y, data.plot_unit_y)
        ylabel = data.plot_unit_y

    # if data.plot_type == 'indicator_vs_timeslices':
    #     x = [xx.split('-') for xx in df2['x']]
    #     df2['x'] = [(int(y1)+int(y2))/2 for y1, y2 in x]
    #     axisX = alt.X('x:Q', title=xlabel, scale=alt.Scale(domain=[1900, 2100]))
    # else:
    axisX = alt.X('x:Q',
                  title=xlabel,
                  scale=alt.Scale(domain=[0, df2['x'].max()]),
                  axis=alt.Axis(values=data.x))

    base = alt.Chart(df2)

    nearest = alt.selection(type='single',
                            nearest=True,
                            on='mouseover',
                            empty='none')
    #                             fields=["x", "median"], empty='none')

    # axisY = alt.Y('median:Q', title=ylabel, axis=alt.Y(format='%'))
    axisY = alt.Y('median:Q', title=ylabel, axis=alt.Axis(format='%'))

    color = 'orange'
    color2 = alt.Color(
        'climate',
        title='Climate Model',
        # scale=alt.Scale(scheme='tableau10'))
        scale=alt.Scale(domain=['Median'], range=[color]))

    area = base.mark_area(opacity=0.3, color=color).encode(
        x=axisX,
        y=alt.Y('lower:Q'),
        y2=alt.Y2('upper:Q'),
    )

    lines = base.mark_line(color=color).encode(
        x=axisX,
        y=axisY,
    )

    points = base.mark_point(size=60).encode(
        x=axisX,
        y=axisY,
        color=color2,
        tooltip=[
            alt.Tooltip('x:Q', title=xlabel),
            alt.Tooltip('median:Q', title=ylabel, format='.1%')
        ],
    )

    chart = (points + lines + area).properties(
        title=title,
        width=800,
        autosize=alt.AutoSizeParams(contains="padding", type="fit-x"),
    )

    return configure_chart(chart).interactive()

Ejemplo n.º 6

0

Mostrar archivo

Archivo: legacyfigures.py Proyecto: ISI-MIP/isipedia-library

def _lineplot_altair_time_advanced(data,
                                   x=None,
                                   scenario=None,
                                   climate=None,
                                   impact=None,
                                   shading=False,
                                   title='',
                                   xlabel='',
                                   ylabel=''):
    import pandas as pd
    import altair as alt

    df = pd.concat([pd.DataFrame(l) for l in data.filter_lines()])

    # Divide by 100 so we can percent-format in the plot
    df.y = df.y / 100

    df["x_range"] = df.x
    df.x = df.x.apply(lambda x: int(x.split("-")[1]) - 10)
    df = df[df.x < 2100]

    # Fill in gap by duplicating historical values to future scenarios
    extra = df[(df.scenario == "historical") & (df.x == 1990)].copy()
    extra.at[:, "scenario"] = "rcp60"
    df = df.append(extra)
    extra = df[(df.scenario == "historical") & (df.x == 1990)].copy()
    extra.at[:, "scenario"] = "rcp26"
    df = df.append(extra)

    df["model"] = df.climate + " / " + df.impact

    df = df.replace(scenario_map)
    # print(df)
    # ------------------
    axisX = alt.X('x:Q',
                  title=xlabel or 'Time',
                  scale=alt.Scale(domain=[1900, 2100]),
                  axis=alt.Axis(format="i",
                                values=np.arange(1900, 2100 + 1, 20).tolist()))

    selection_climate = alt.selection_multi(fields=['scenario'], bind='legend')

    nearest = alt.selection(type='single',
                            nearest=True,
                            on='mouseover',
                            fields=["x", 'y'],
                            empty='none')

    base = alt.Chart(df[(df.climate != "median")])

    # color = alt.Color('scenario', scale=alt.Scale(scheme="tableau10"))
    color = alt.Color(
        'scenario',
        title='Climate Scenario',
        # scale=alt.Scale(scheme='tableau10'))
        scale=alt.Scale(
            domain=list(scenario_map.values()),
            # scale=alt.Scale(domain=list(scenario_map.keys()),
            range=['#4674b9', '#80b946', 'orange']))

    rule_data = pd.DataFrame({'line': [2005]})
    rule_text_data = pd.DataFrame([
        {
            "year": 1910,
            "text": "Historical Period"
        },
        {
            "year": 2015,
            "text": "Future Projections"
        },
    ])

    rule = alt.Chart(rule_data).mark_rule().encode(x='line:Q')

    rule_text = alt.Chart(rule_text_data).mark_text(
        align="left", dy=-130).encode(x="year", text="text")

    area = base.mark_area(opacity=0.3).encode(
        x=axisX,
        color=color,
        y=alt.Y(field="y",
                type="quantitative",
                axis=alt.Axis(format='%'),
                aggregate="min"),
        y2=alt.Y2(field="y", aggregate="max"),
        opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)),
    ).add_selection(selection_climate)

    lines = base.mark_line().encode(
        x=axisX,
        y=alt.Y("y", axis=alt.Axis(format='%')),
        detail=["climate", "impact", "scenario"],
        color=color,
        opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)),
        size=alt.condition("datum.impact == 'median'", alt.value(5),
                           alt.value(1)))

    points = base.mark_point().encode(
        x=axisX,
        y=alt.Y("y", axis=alt.Axis(title=data.plot_unit_y, format='%')),
        detail=["climate", "impact", "scenario"],
        color=color,
        opacity=alt.condition(selection_climate, alt.value(0.3), alt.value(0)),
        size=alt.value(12),
    )

    text_model = points.mark_text(align='left', dx=-5, dy=-6).encode(
        text=alt.condition(nearest, "model", alt.value(' ')),
        opacity=alt.condition(selection_climate, alt.value(1), alt.value(0)),
        color=alt.value("black")).add_selection(nearest)

    text_pct = points.mark_text(align='left', dx=-5, dy=6).encode(
        text=alt.condition(nearest, "y", alt.value(' '), format=".2p"),
        opacity=alt.condition(selection_climate, alt.value(1), alt.value(0)),
        color=alt.value("black"))

    chart = (area + rule + rule_text + lines + points + text_model +
             text_pct).properties(
                 title=title or data.plot_title,
                 width=800,
                 autosize=alt.AutoSizeParams(contains="padding", type="fit-x"),
             )

    # chart.save("chart.json")
    return configure_chart(chart).interactive()

Ejemplo n.º 7

0

Mostrar archivo

def gen_sample_plot(metadata):
    """Uses Altair to generate a JSON Vega-Lite spec for the sample plot.

    Parameters
    ----------

    metadata: pd.DataFrame
        DataFrame containing sample metadata information. (Indices correspond
        to samples, and columns correspond to sample metadata fields.)

        This should have already been matched with the BIOM table, had empty
        samples removed, etc.

    Returns
    -------

    sample_chart_json: dict
        A dict version of the alt.Chart for the sample plot.
    """
    sample_metadata = metadata.copy()

    # Used to set color
    default_metadata_col = sample_metadata.columns[0]

    # Since we don't bother setting a default log-ratio, we set the balance for
    # every sample to None so that Vega* will filter them out (producing an
    # empty scatterplot by default, which makes sense).
    sample_metadata["qurro_balance"] = None

    # "Reset the index" -- make the sample IDs a column (on the leftmost side)
    # First we rename the index "Sample ID", though. (Note that our use of
    # check_column_names() means that there shouldn't be any sample metadata
    # fields named "Sample ID".)
    sample_metadata.rename_axis("Sample ID", axis="index", inplace=True)
    sample_metadata.reset_index(inplace=True)

    # Create sample plot chart Vega-Lite spec using Altair.
    sample_chart = (alt.Chart(
        sample_metadata,
        title="Samples",
        background="#FFFFFF",
        autosize=alt.AutoSizeParams(resize=True),
    ).mark_circle().encode(
        alt.X(
            default_metadata_col,
            type="nominal",
            axis=alt.Axis(labelAngle=-45),
        ),
        alt.Y(
            "qurro_balance:Q",
            title="Current Log-Ratio",
            type="quantitative",
        ),
        color=alt.Color(default_metadata_col, type="nominal"),
        tooltip=["Sample ID:N", "qurro_balance:Q"],
    ).configure_range(
        ramp=alt.SchemeConfig(scheme="blues"),
        category=alt.SchemeConfig(scheme="tableau10"),
    ).configure_axis(labelBound=True).interactive())

    # Replace the "mark": "circle" definition with a more explicit one. This
    # will be useful when adding attributes to the boxplot mark in the
    # visualization. (We have to resort to this hack because I haven't been
    # able to successfully use alt.MarkDef in the alt.Chart definition above.)
    sample_chart_dict = sample_chart.to_dict()
    sample_chart_dict["mark"] = {"type": "circle"}

    sm_fields = "qurro_sample_metadata_fields"
    check_json_dataset_names(sample_chart_dict, sm_fields)
    # Specify an alphabetical ordering for the sample metadata field names.
    # This will be used for populating the x-axis / color field selectors in
    # Qurro's sample plot controls.
    #
    # Importantly, this is case insensitive (by default, the json.dumps
    # sort_keys parameter considers names like "Sample ID" to occur before
    # names like "age" due to casing -- we use this list to get around this).
    # Solution based on this article:
    # https://www.afternerd.com/blog/python-sort-list/#sort-strings-case-insensitive
    #
    # Also, we remove qurro_balance from this list because it shouldn't be
    # exposed to the user in the Qurro interface. (It's already used on the
    # y-axis of the sample plot automatically.)
    sorted_md_cols = list(sorted(sample_metadata.columns, key=str.lower))
    sorted_md_cols.remove("qurro_balance")
    sample_chart_dict["datasets"][sm_fields] = sorted_md_cols
    return sample_chart_dict

Ejemplo n.º 8

0

Mostrar archivo

def gen_rank_plot(V, ranking_ids, feature_metadata_cols):
    """Uses Altair to generate a JSON Vega-Lite spec for the rank plot.

    Parameters
    ----------

    V: pd.DataFrame
        DataFrame containing feature rank (and feature metadata, if applicable)
        information. (Indices correspond to features, and columns correspond
        to feature ranking or feature metadata fields.)

        This should have already been matched with the BIOM table, filtered (if
        -x passed), had empty features removed, etc.

    ranking_ids: pd.Index
        IDs of the actual "feature ranking" columns in V.

    feature_metadata_cols: pd.Index or list
        IDs of the "feature metadata" columns in V (if there wasn't any
        feature metadata provided, this can just be an empty list).

    Returns
    -------

    rank_chart_json: dict
        A dict version of the alt.Chart for the rank plot, with
        qurro_rank_ordering and qurro_feature_metadata_ordering datasets
        added in indicating which columns describe feature rankings and
        which describe feature metadata.
    """

    rank_data = V.copy()

    # NOTE that until this point we've treated the actual rank values as just
    # "objects", as far as pandas is concerned. However, if we continue to
    # treat them as objects when sorting them, we'll get a list of feature
    # ranks in lexicographic order... which is not what we want. So we just
    # ensure that all of the columns contain numeric data.
    for col in ranking_ids:
        rank_data[col] = pd.to_numeric(rank_data[col])

    # The default rank column is just whatever the first rank is. This is what
    # the rank plot will use when it's first drawn.
    default_rank_col = ranking_ids[0]

    # Set default classification of every feature to "None"
    # (This value will be updated when a feature is selected in the rank plot
    # as part of the numerator, denominator, or both parts of the current log
    # ratio.)
    rank_data["qurro_classification"] = "None"

    # Replace "index" with "Feature ID". looks nicer in the visualization :)
    rank_data.rename_axis("Feature ID", axis="index", inplace=True)
    rank_data.reset_index(inplace=True)

    # Now, we can actually create the rank plot.
    rank_chart = (
        alt.Chart(
            rank_data,
            title="Features",
            background="#FFFFFF",
            autosize=alt.AutoSizeParams(resize=True),
        ).mark_bar().transform_window(
            sort=[alt.SortField(field=default_rank_col, order="ascending")],
            # We don't use an alt.WindowFieldDef here because python gets
            # confused when you use "as" as an actual argument name. So we just
            # use this syntax.
            window=[{
                "op": "row_number",
                "as": "qurro_x"
            }],
        ).encode(
            # type="ordinal" needed on the scale here to make bars adjacent;
            # see https://stackoverflow.com/a/55544817/10730311.
            x=alt.X(
                "qurro_x",
                title="Feature Rankings",
                type="ordinal",
                scale=alt.Scale(paddingOuter=1, paddingInner=0, rangeStep=1),
                axis=alt.Axis(ticks=False, labelAngle=0),
            ),
            y=alt.Y(default_rank_col, type="quantitative"),
            color=alt.Color(
                "qurro_classification",
                title="Log-Ratio Classification",
                scale=alt.Scale(
                    domain=["None", "Numerator", "Denominator", "Both"],
                    range=["#e0e0e0", "#f00", "#00f", "#949"],
                ),
            ),
            tooltip=[
                alt.Tooltip(
                    field="qurro_x",
                    title="Current Ranking",
                    type="quantitative",
                ),
                alt.Tooltip(
                    field="qurro_classification",
                    title="Log-Ratio Classification",
                    type="nominal",
                ),
                "Feature ID",
                *feature_metadata_cols,
                *ranking_ids,
            ],
        ).configure_axis(
            # Done in order to differentiate "None"-classification features
            # from grid lines
            gridColor="#f2f2f2",
            labelBound=True,
        ).interactive())

    rank_chart_json = rank_chart.to_dict()
    rank_ordering = "qurro_rank_ordering"
    fm_col_ordering = "qurro_feature_metadata_ordering"
    check_json_dataset_names(rank_chart_json, rank_ordering, fm_col_ordering)

    # Note we don't use rank_data.columns for setting the rank ordering. This
    # is because rank_data's columns now include both the ranking IDs and the
    # "Feature ID" and "qurro_classification" columns (as well as any feature
    # metadata the user saw fit to pass in).
    rank_chart_json["datasets"][rank_ordering] = list(ranking_ids)
    rank_chart_json["datasets"][fm_col_ordering] = list(feature_metadata_cols)
    return rank_chart_json

Ejemplo n.º 9

0

Mostrar archivo

def missingDates(data, freq="D", format='%Y-%m-%d', returnType="viz"):
    """
    Check for Missing Dates

    This function is used to return either a list of missing dates from a pd.Series or
    chart the missing dates.

    Attributes
    ----------
    data : pd.Series, default None
    A Pandas series that contains dates. Dates will be parsed using pd.to_datetime()
    with a default strftime of '%Y-%m-%d'. Use strftime arg to alter date format
    freq: object, default '%Y-%m-%d'
    The expected frequency of the dates. Valid options are:
        B: business day frequency
        C: custom business day frequency
        D: calendar day frequency
        W: weekly frequency
        M: month end frequency
        SM: semi-month end frequency (15th and end of month)
        BM: business month end frequency
        CBM: custom business month end frequency
        MS: month start frequency
        SMS: semi-month start frequency (1st and 15th)
        BMS: business month start frequency
        CBMS: custom business month start frequency
        Q: quarter end frequency
        BQ: business quarter end frequency
        QS: quarter start frequency
        BQS: business quarter start frequency
        A, Y: year end frequency
        BA, BY: business year end frequency
        AS, YS: year start frequency
        BAS, BYS: business year start frequency
        BH: business hour frequency
        H: hourly frequency
        T, min: minutely frequency
        S: secondly frequency
        L, ms: milliseconds
        U, us: microseconds
        N: nanoseconds
    format:
    returnType: object, default viz
    One of:
        missing: Return the missing dates
        all: Return missing and present dates
        viz: Return a vizualisation
    """
    try:
        assert type(data) == pd.Series

        datesToCheck = pd.to_datetime(list(data))

        minDate = datesToCheck.min().strftime("%Y-%m-%d")
        maxDate = datesToCheck.max().strftime("%Y-%m-%d")
        computedRange = pd.date_range(minDate, maxDate, freq=freq)

        allChecks = []
        for date in computedRange:
            currentDateResult = date in datesToCheck
            currentDate = {"date": date, "exists": currentDateResult}
            allChecks.append(currentDate)

        allChecks = pd.DataFrame(allChecks)
        missing = list(allChecks.exists).count(False)
        present = list(allChecks.exists).count(True)
        total = present + missing

        allChecks['date'] = allChecks.date.map(
            lambda x: pd.to_datetime(x).strftime('%Y-%m-%d'))

        calculatedTitle = "Total Dates: " + str(
            total) + ", Missing Dates: " + str(missing) + ", (" + str(
                int((missing / total) * 100)) + "%)"

        scale = alt.Scale(domain=['true', 'false'],
                          range=['#B8E986', '#F15545'])

        if returnType == 'viz':
            xRange = len(computedRange)
            barSize = (800 / xRange)
            padding = (barSize / 2) + 1

            results = alt.Chart(allChecks,
                                title=alt.TitleParams(calculatedTitle,
                                                      anchor="start",
                                                      offset=20,
                                                      orient="top"),
                                width=800,
                                height=400,
                                autosize=alt.AutoSizeParams(
                                    contains="content",
                                    resize=True,
                                    type="fit")).mark_bar(size=barSize).encode(
                                        x=alt.X(
                                            "date",
                                            axis=alt.Axis(tickCount=xRange),
                                            title="Date",
                                            type="temporal",
                                            scale=alt.Scale(padding=padding)),
                                        y="count()",
                                        color=alt.Color("exists", scale=scale),
                                        tooltip=[
                                            alt.Tooltip("date",
                                                        format="%Y-%m-%d",
                                                        type="temporal"),
                                            "exists", "count()"
                                        ]).interactive(bind_y=False)

        elif returnType == "missing":
            results = allChecks[allChecks['exists'] == False]
        elif returnType == "all":
            results = allChecks
        return results
    except Exception as E:
        raise E