Esempio n. 1
0
def check_shapes(pfeed, *, as_df=False, include_warnings=False):
    """
    Analog of :func:`check_frequencies` for ``pfeed.shapes``
    """
    table = "shapes"
    problems = []

    # Preliminary checks
    if pfeed.shapes is None:
        return problems

    f = pfeed.shapes.copy()
    problems = check_for_required_columns(problems, table, f)
    if problems:
        return gk.format_problems(problems, as_df=as_df)

    if include_warnings:
        problems = check_for_invalid_columns(problems, table, f)

    # Check shape_id
    problems = gk.check_column(problems, table, f, "shape_id", gk.valid_str)

    # Check geometry
    v = lambda x: isinstance(x, sg.LineString) and not x.is_empty
    problems = gk.check_column(problems, table, f, "geometry", v)

    return gk.format_problems(problems, as_df=as_df)
Esempio n. 2
0
def check_meta(pfeed, *, as_df=False, include_warnings=False):
    """
    Analog of :func:`check_frequencies` for ``pfeed.meta``
    """
    table = "meta"
    problems = []

    # Preliminary checks
    if pfeed.meta is None:
        problems.append(["error", "Missing table", table, []])
    else:
        f = pfeed.meta.copy()
        problems = check_for_required_columns(problems, table, f)
    if problems:
        return gk.format_problems(problems, as_df=as_df)

    if include_warnings:
        problems = check_for_invalid_columns(problems, table, f)

    if f.shape[0] > 1:
        problems.append([
            "error", "Meta must have only one row", table,
            list(range(1, f.shape[0]))
        ])

    # Check agency_name
    problems = gk.check_column(problems, table, f, "agency_name", gk.valid_str)

    # Check agency_url
    problems = gk.check_column(problems, table, f, "agency_url", gk.valid_url)

    # Check agency_timezone
    problems = gk.check_column(problems, table, f, "agency_timezone",
                               gk.valid_timezone)

    # Check start_date and end_date
    for col in ["start_date", "end_date"]:
        problems = gk.check_column(problems, table, f, col, gk.valid_date)

    # Check default_route_speed
    problems = gk.check_column(problems, table, f, "default_route_speed",
                               valid_speed)

    return gk.format_problems(problems, as_df=as_df)
Esempio n. 3
0
def check_service_windows(pfeed, *, as_df=False, include_warnings=False):
    """
    Analog of :func:`check_frequencies` for ``pfeed.service_windows``
    """
    table = "service_windows"
    problems = []

    # Preliminary checks
    if pfeed.service_windows is None:
        problems.append(["error", "Missing table", table, []])
    else:
        f = pfeed.service_windows.copy()
        problems = check_for_required_columns(problems, table, f)
    if problems:
        return gk.format_problems(problems, as_df=as_df)

    if include_warnings:
        problems = check_for_invalid_columns(problems, table, f)

    # Check service window ID
    problems = gk.check_column_id(problems, table, f, "service_window_id")

    # Check start_time and end_time
    for column in ["start_time", "end_time"]:
        problems = gk.check_column(problems, table, f, column, gk.valid_time)

    # Check weekday columns
    v = lambda x: x in range(2)
    for col in [
            "monday",
            "tuesday",
            "wednesday",
            "thursday",
            "friday",
            "saturday",
            "sunday",
    ]:
        #
        problems = gk.check_column(problems, table, f, col, v)

    return gk.format_problems(problems, as_df=as_df)
Esempio n. 4
0
def validate(pfeed, *, as_df=True, include_warnings=True):
    """
    Check whether the given pfeed satisfies the ProtoFeed spec.

    Parameters
    ----------
    pfeed : ProtoFeed
    as_df : boolean
        If ``True``, then return the resulting report as a DataFrame;
        otherwise return the result as a list
    include_warnings : boolean
        If ``True``, then include problems of types ``'error'`` and
        ``'warning'``; otherwise, only return problems of type
        ``'error'``

    Returns
    -------
    list or DataFrame
        Run all the table-checking functions: :func:`check_agency`,
        :func:`check_calendar`, etc.
        This yields a possibly empty list of items
        [problem type, message, table, rows].
        If ``as_df``, then format the error list as a DataFrame with the
        columns

        - ``'type'``: 'error' or 'warning'; 'error' means the ProtoFeed
          spec is violated; 'warning' means there is a problem but it's
          not a ProtoFeed spec violation
        - ``'message'``: description of the problem
        - ``'table'``: table in which problem occurs, e.g. 'routes'
        - ``'rows'``: rows of the table's DataFrame where problem occurs

        Return early if the pfeed is missing required tables or required
        columns.

    """
    problems = []

    # Check for invalid columns and check the required tables
    checkers = [
        "check_frequencies",
        "check_meta",
        "check_service_windows",
        "check_shapes",
        "check_stops",
    ]
    for checker in checkers:
        problems.extend(globals()[checker](pfeed,
                                           include_warnings=include_warnings))

    return gk.format_problems(problems, as_df=as_df)
Esempio n. 5
0
def check_frequencies(pfeed, *, as_df=False, include_warnings=False):
    """
    Check that ``pfeed.frequency`` follows the ProtoFeed spec.
    Return a list of problems of the form described in
    :func:`gk.check_table`;
    the list will be empty if no problems are found.
    """
    table = "frequencies"
    problems = []

    # Preliminary checks
    if pfeed.frequencies is None:
        problems.append(["error", "Missing table", table, []])
    else:
        f = pfeed.frequencies.copy()
        problems = check_for_required_columns(problems, table, f)
    if problems:
        return gk.format_problems(problems, as_df=as_df)

    if include_warnings:
        problems = check_for_invalid_columns(problems, table, f)

    # Check route_short_name and route_long_name
    for column in ["route_short_name", "route_long_name"]:
        problems = gk.check_column(problems,
                                   table,
                                   f,
                                   column,
                                   gk.valid_str,
                                   column_required=False)

    cond = ~(f["route_short_name"].notnull() | f["route_long_name"].notnull())
    problems = gk.check_table(
        problems, table, f, cond,
        "route_short_name and route_long_name both empty")

    # Check route_type
    v = lambda x: x in range(8)
    problems = gk.check_column(problems, table, f, "route_type", v)

    # Check service window ID
    problems = gk.check_column_linked_id(problems, table, f,
                                         "service_window_id",
                                         pfeed.service_windows)

    # Check direction
    v = lambda x: x in range(3)
    problems = gk.check_column(problems, table, f, "direction", v)

    # Check frequency
    v = lambda x: isinstance(x, int)
    problems = gk.check_column(problems, table, f, "frequency", v)

    # Check speed
    problems = gk.check_column(problems,
                               table,
                               f,
                               "speed",
                               valid_speed,
                               column_required=False)

    # Check shape ID
    problems = gk.check_column_linked_id(problems, table, f, "shape_id",
                                         pfeed.shapes)

    return gk.format_problems(problems, as_df=as_df)