Beispiel #1
0
    def test_cached_name(self, mock_data_fetcher):
        dcid1 = 'geoId/06'
        dcid2 = 'geoId/07'
        dcid3 = 'geoId/08'
        mock_response = {
            dcid1: {
                'out': [{
                    'value': 'California',
                    'provenance': 'prov1'
                }]
            },
            dcid2: {
                'out': []
            },
            dcid3: {
                'out': [{
                    'value': 'Colorado',
                    'provenance': 'prov2'
                }]
            }
        }
        mock_data_fetcher.side_effect = (
            lambda url, req, compress, post: mock_response)

        result = shared_api.cached_name('^'.join([dcid1, dcid2, dcid3]))
        assert result == {dcid1: 'California', dcid2: '', dcid3: 'Colorado'}
Beispiel #2
0
def node(dcid):
    if os.environ.get('FLASK_ENV') == 'production':
        flask.abort(404)
    node_name = shared_api.cached_name(dcid).get(dcid)
    if not node_name:
        node_name = dcid
    return render_template('/disease/node.html',
                           dcid=dcid,
                           node_name=node_name)
Beispiel #3
0
def get_name(dcids):
    """Returns display names for set of dcids.

    Args:
        dcids: A list of place dcids.

    Returns:
        A dictionary of display place names, keyed by dcid.
    """
    return cached_name('^'.join((sorted(dcids))))
Beispiel #4
0
def browser_node(dcid):
    if os.environ.get('FLASK_ENV') == 'svobs' or os.environ.get(
            'FLASK_ENV') == 'development-svobs':
        node_name = shared_api.cached_name(dcid).get(dcid)
        if not node_name:
            node_name = dcid
        return render_template('/browser/node.html',
                               dcid=dcid,
                               node_name=node_name)
    return render_template('/browser/kg_entity.html', dcid=dcid)
Beispiel #5
0
def cached_i18n_name(dcids, locale, should_resolve_all):
    """Returns localization names for set of dcids.

    Args:
        dcids: ^ separated string of dcids. It must be a single string for the cache.
        locale: the desired localization language code.
        should_resolve_all: True if every dcid should be returned with a
                            name, False if only i18n names should be filled

    Returns:
        A dictionary of place names, keyed by dcid (potentially sparse if should_resolve_all=False)
    """
    if not dcids:
        return {}
    dcids = dcids.split('^')
    response = fetch_data('/node/property-values', {
        'dcids': dcids,
        'property': 'nameWithLanguage',
        'direction': 'out'
    },
                          compress=False,
                          post=True)
    result = {}
    dcids_default_name = []
    locales = i18n.locale_choices(locale)
    for dcid in dcids:
        values = response[dcid].get('out')
        # If there is no nameWithLanguage for this dcid, fall back to name.
        if not values:
            dcids_default_name.append(dcid)
            continue
        result[dcid] = ''
        for locale in locales:
            for entry in values:
                if has_locale_name(entry, locale):
                    result[dcid] = extract_locale_name(entry, locale)
                    break
            if result[dcid]:
                break
    if dcids_default_name:
        if should_resolve_all:
            default_names = cached_name('^'.join(sorted(dcids_default_name)))
        else:
            default_names = {}
        for dcid in dcids_default_name:
            result[dcid] = default_names.get(dcid, '')
    return result
Beispiel #6
0
def browser_node(dcid):
    node_name = shared_api.cached_name(dcid).get(dcid)
    if not node_name:
        node_name = dcid
    return render_template('/browser/node.html', dcid=dcid, node_name=node_name)
Beispiel #7
0
def get_series_csv_rows(series_response,
                        sv_list,
                        facet_map,
                        min_date,
                        max_date,
                        row_limit=None):
    """
    Gets the csv rows for a set of statistical variable series for a certain
    date range.

    Args:
        series_response: the response from a dc.series_within call
        sv_list: list of variables in the order that they should appear from
            left to right in each csv row.
        min_date (optional): the earliest date as a string to get data for. If
            not set get all dates up to max_date (if max_date is set).
        max_date (optional): the latest date as a string to get data for. If not
            set, get all dates starting at min_date (if min_date is set).
        row_limit (optional): number of csv rows to return

    Returns:
        An array where each item in the array is a csv row. These csv rows are
        represented as an array where each item is the value of a cell in the
        row.
    """
    facets = series_response.get("facets", {})
    obs_by_sv = series_response.get("observationsByVariable", [])
    # dict of place dcid to dict of sv dcid to chosen series.
    data_by_place = {}
    for sv_data in obs_by_sv:
        sv = sv_data.get("variable")
        target_facet = facet_map.get(sv, "")
        for place_data in sv_data.get("observationsByEntity", []):
            place = place_data.get("entity")
            series_by_facet = place_data.get("seriesByFacet", [])
            if not place in data_by_place:
                data_by_place[place] = {}
            for series in series_by_facet:
                # if no facet selected for this variable, choose the first
                # series in the list because seriesByFacet is sorted by best
                # facet first
                if target_facet == "":
                    data_by_place[place][sv] = series
                    break
                if str(series.get("facet")) == target_facet:
                    data_by_place[place][sv] = series
                    break
    place_list = sorted(list(data_by_place.keys()))
    place_names = cached_name("^".join(place_list))
    result = []
    for place, place_name in place_names.items():
        # dict of sv to sorted list of data points available for the sv and is within
        # the date range
        sv_data_points = {}
        # dict of sv to its source
        sv_source = {}
        # dict of sv to the idx of the next date for that sv to add to the result
        sv_curr_index = {}
        # whether or not there is still data to add to the result
        have_data = False
        for sv in sv_list:
            sv_series = data_by_place.get(place, {}).get(sv, {})
            want_data_points = []
            # Go through the series and keep data points that are within the
            # date range
            for data_point in sv_series.get("series", []):
                date = data_point.get("date")
                is_greater_than_min = date_greater_equal_min(date, min_date)
                is_less_than_max = date_lesser_equal_max(date, max_date)
                if is_greater_than_min and is_less_than_max:
                    want_data_points.append(data_point)
            want_data_points.sort(key=lambda x: x["date"])
            sv_data_points[sv] = want_data_points
            facetId = sv_series.get("facet", "")
            sv_source[sv] = facets.get(str(facetId),
                                       {}).get("provenanceUrl", "")
            sv_curr_index[sv] = 0
            have_data = have_data or len(want_data_points) > 0
        while have_data:
            if row_limit and len(result) >= row_limit:
                break
            curr_date = ""
            # look through all the next dates to add data for and choose the
            # earliest date and the one with highest granularity
            # eg. between 2015 and 2015-01 we want 2015-01
            #     between 2015 and 2016 we want 2015
            for sv, idx in sv_curr_index.items():
                if idx >= len(sv_data_points[sv]):
                    continue
                curr_sv_date = sv_data_points[sv][idx]["date"]
                if not curr_date:
                    curr_date = curr_sv_date
                elif curr_sv_date < curr_date or curr_sv_date.startswith(
                        curr_date):
                    curr_date = curr_sv_date
            have_data = False
            place_date_row = [place, place_name]
            for sv, idx in sv_curr_index.items():
                # if a sv doesn't have any more data left, just append empty cells
                if idx >= len(sv_data_points[sv]):
                    place_date_row.extend(["", "", ""])
                    continue
                curr_sv_date = sv_data_points[sv][idx]["date"]
                # Add data for an sv if the current date to add for that sv is
                # equal to or encompassing the chosen date. Eg. if the chosen
                # date is 2015-01-02, then we can add data from 2015, 2015-01 or
                # 2015-01-02.
                if curr_date.startswith(curr_sv_date):
                    value = sv_data_points[sv][idx]["value"]
                    place_date_row.extend(
                        [curr_sv_date, value,
                         sv_source.get(sv, "")])
                    sv_curr_index[sv] += 1
                else:
                    place_date_row.extend(["", "", ""])
                have_data = have_data or sv_curr_index[sv] < len(
                    sv_data_points[sv])
            result.append(place_date_row)
    return result
Beispiel #8
0
def get_points_within_csv_rows(parent_place,
                               child_type,
                               sv_list,
                               facet_map,
                               date,
                               row_limit=None):
    """
    Gets the csv rows for a set of statistical variables data for child places
    of a certain place type contained in a parent place.

    Args:
        parent_place: the parent place of the places to get data for
        child_type: the type of places to get data for
        sv_list: list of variables in the order that they should appear from
            left to right in each csv row.
        date: the date to get the data for
        row_limit (optional): number of csv rows to return

    Returns:
        An array where each item in the array is a csv row. These csv rows are
        represented as an array where each item is the value of a cell in the
        row.
    """
    points_response_all = dc.points_within(parent_place, child_type, sv_list,
                                           date, True)
    points_response_best = {}
    # Set of stat vars where we need to make a separate call to
    # dc.points_within to get the data points of the latest date and best facet
    sv_latest_best_point = set()
    if date == "":
        for sv in sv_list:
            if facet_map.get(sv, "") == "":
                sv_latest_best_point.add(sv)
        if len(sv_latest_best_point) > 0:
            points_response_best = dc.points_within(parent_place, child_type,
                                                    list(sv_latest_best_point),
                                                    date, False)
    # dict of place dcid to dict of sv dcid to chosen data point.
    data_by_place = {}
    # go through the data in points_response_best and add to data_by_place
    for sv_data in points_response_best.get("observationsByVariable", []):
        sv = sv_data.get("variable")
        for place_data in sv_data.get("observationsByEntity", []):
            place = place_data.get("entity")
            if not place in data_by_place:
                data_by_place[place] = {}
            # points_response_best should just have a single best (latest date
            # and best facet) data point for each stat var and place
            if len(place_data.get("pointsByFacet")) > 0:
                data_by_place[place][sv] = place_data.get("pointsByFacet")[0]
    # go through the data in points_response_all and add to data_by_place
    for sv_data in points_response_all.get("observationsByVariable", []):
        sv = sv_data.get("variable")
        # points_response_all has data for all stat vars, but we want to skip
        # the stat vars that are included in points_response_best
        if sv in sv_latest_best_point:
            continue
        target_facet = facet_map.get(sv, "")
        for place_data in sv_data.get("observationsByEntity", []):
            place = place_data.get("entity")
            if not place in data_by_place:
                data_by_place[place] = {}
            points_by_facet = place_data.get("pointsByFacet", [])
            for point in points_by_facet:
                # if no facet selected for this variable, choose the first
                # point in the list because pointsByFacet is sorted by best
                # facet first
                if target_facet == "":
                    data_by_place[place][sv] = point
                    break
                if str(point.get("facet")) == target_facet:
                    data_by_place[place][sv] = point
                    break
    facet_info = points_response_all.get("facets", {})
    place_list = sorted(list(data_by_place.keys()))
    place_names = cached_name("^".join(place_list))
    result = []
    for place, place_name in place_names.items():
        if row_limit and len(result) >= row_limit:
            break
        place_row = [place, place_name]
        for sv in sv_list:
            data = data_by_place.get(place, {}).get(sv, {})
            date = data.get("date", "")
            value = data.get("value", "")
            facetId = data.get("facet", "")
            facet = facet_info.get(str(facetId), {}).get("provenanceUrl", "")
            place_row.extend([date, value, facet])
        result.append(place_row)
    return result