Example #1
0
def _process_extra_facets(extra_facets):
    if (not extra_facets) or extra_facets == ['']:
        current_app.config['EXTRA_FACET_INFO'] = {}
        return

    es = Elasticsearch(current_app.config['ELASTICSEARCH_URL'])
    facets = OrderedDict()
    mapping = es.indices.get_mapping(index=current_app.config['INDEX_NAME'])

    for es_base_field_name in extra_facets:
        es_parent_field_name = es_base_field_name.rsplit('.', 1)[0]
        is_time_series = elasticsearch_util.is_time_series(
            es_base_field_name, mapping)
        parent_is_time_series = elasticsearch_util.is_time_series(
            es_parent_field_name, mapping)
        if is_time_series:
            time_series_vals = elasticsearch_util.get_time_series_vals(
                es_base_field_name, mapping)
            es_field_names = [
                es_base_field_name + '.' + tsv for tsv in time_series_vals
            ]
            for es_field_name in es_field_names:
                separate_panel = (es_field_name in facets
                                  and facets[es_field_name]['separate_panel'])
                _add_facet(es_field_name, is_time_series,
                           parent_is_time_series, True, separate_panel,
                           time_series_vals, facets, es, mapping)
        elif parent_is_time_series:
            time_series_vals = elasticsearch_util.get_time_series_vals(
                es_parent_field_name, mapping)
            time_series_panel = (
                es_base_field_name in facets
                and facets[es_base_field_name]['time_series_panel'])
            _add_facet(es_base_field_name, is_time_series,
                       parent_is_time_series, time_series_panel, True,
                       time_series_vals, facets, es, mapping)
        else:
            _add_facet(es_base_field_name, is_time_series,
                       parent_is_time_series, False, True, [], facets, es,
                       mapping)

    # Map from Elasticsearch field name to dict with ui facet name,
    # Elasticsearch field type, optional UI facet description and Elasticsearch
    # facet. This map is for extra facets added from the field search dropdown
    # on the UI.
    # This must be stored separately from FACET_INFO. If this were added to
    # FACET_INFO, then if user deletes extra facets chip, we wouldn't know which
    # facet to remove from FACET_INFO.
    current_app.config['EXTRA_FACET_INFO'] = facets
Example #2
0
def _results_from_fields_index(fields, mapping):
    results = []
    for field in fields['hits']['hits']:
        es_base_field_name = field["_id"]
        if "description" in field["_source"]:
            results.append(
                SearchResult(facet_name=field["_source"]["name"],
                             facet_description=field["_source"]["description"],
                             elasticsearch_field_name=es_base_field_name,
                             facet_value="",
                             is_time_series=False))
            if elasticsearch_util.is_time_series(es_base_field_name, mapping):
                time_series_vals = elasticsearch_util.get_time_series_vals(
                    es_base_field_name, mapping)
                for tsv in time_series_vals:
                    results.append(
                        SearchResult(
                            facet_name=field["_source"]["name"],
                            facet_description=field["_source"]["description"],
                            elasticsearch_field_name=es_base_field_name + '.' +
                            tsv,
                            facet_value="",
                            is_time_series=True))
        else:
            results.append(
                SearchResult(facet_name=field["_source"]["name"],
                             elasticsearch_field_name=es_base_field_name,
                             facet_value="",
                             is_time_series=False))
            if elasticsearch_util.is_time_series(es_base_field_name, mapping):
                time_series_vals = elasticsearch_util.get_time_series_vals(
                    es_base_field_name, mapping)
                for tsv in time_series_vals:
                    results.append(
                        SearchResult(
                            facet_name=field["_source"]["name"],
                            elasticsearch_field_name=es_base_field_name + '.' +
                            tsv,
                            facet_value="",
                            is_time_series=True))
    return results
Example #3
0
def _process_extra_facets(es, extra_facets):
    """Processes extra facets and sets current_app.config['EXTRA_FACET_INFO'].

    Args:
      es: Elasticsearch
      extra_facets: List of es_field_name

    Returns:
      A list of es_field_names that don't exist in Elasticsearch index. The
      invalid fields are not included in current_app.config['EXTRA_FACET_INFO']
    """
    if (not extra_facets) or extra_facets == ['']:
        current_app.config['EXTRA_FACET_INFO'] = {}
        return

    facets = OrderedDict()
    mapping = es.indices.get_mapping(index=current_app.config['INDEX_NAME'])
    invalid_extra_facets = []

    for es_base_field_name in extra_facets:
        if not elasticsearch_util.field_exists(es, es_base_field_name):
            invalid_extra_facets.append(es_base_field_name)
            continue

        es_parent_field_name = es_base_field_name.rsplit('.', 1)[0]
        is_time_series = elasticsearch_util.is_time_series(
            es_base_field_name, mapping)
        parent_is_time_series = elasticsearch_util.is_time_series(
            es_parent_field_name, mapping)
        if is_time_series:
            time_series_vals = elasticsearch_util.get_time_series_vals(
                es_base_field_name, mapping)
            es_field_names = [
                es_base_field_name + '.' + tsv for tsv in time_series_vals
            ]
            for es_field_name in es_field_names:
                separate_panel = (es_field_name in facets
                                  and facets[es_field_name]['separate_panel'])
                _add_facet(es_field_name, is_time_series,
                           parent_is_time_series, True, separate_panel,
                           time_series_vals, facets, es, mapping)
        elif parent_is_time_series:
            time_series_vals = elasticsearch_util.get_time_series_vals(
                es_parent_field_name, mapping)
            time_series_panel = (
                es_base_field_name in facets
                and facets[es_base_field_name]['time_series_panel'])
            _add_facet(es_base_field_name, is_time_series,
                       parent_is_time_series, time_series_panel, True,
                       time_series_vals, facets, es, mapping)
        else:
            _add_facet(es_base_field_name, is_time_series,
                       parent_is_time_series, False, True, [], facets, es,
                       mapping)

    # Map that follows same format as FACET_INFO in __main__.py.
    # This map is for extra facets added from the search dropdown.
    # This must be stored separately from FACET_INFO. If this were added to
    # FACET_INFO, then if user deletes extra facets chip, we wouldn't know which
    # facet to remove from FACET_INFO.
    current_app.config['EXTRA_FACET_INFO'] = facets
    return invalid_extra_facets
Example #4
0
def _process_facets(es):
    config_path = os.path.join(app.app.config['DATASET_CONFIG_DIR'], 'ui.json')
    facets_config = _parse_json_file(config_path)['facets']

    # Preserve order, so facets are returned in same order as the config file.
    facets = OrderedDict()

    # Add a 'Samples Overview' facet if sample_file_columns were specified in
    # bigquery.json. This facet is mapped to multiple Elasticsearch facets, and
    # has keys - 'elasticsearch_field_names', 'type', 'ui_facet_name' and 'es_facet'.
    if app.app.config['SAMPLE_FILE_COLUMNS']:
        # Construct Elasticsearch filters. See
        # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-filters-aggregation.html
        es_field_names = {}
        for name, field in app.app.config['SAMPLE_FILE_COLUMNS'].iteritems():
            facet_name = 'Has %s' % name
            es_field_name = 'samples._has_%s' % name.lower().replace(' ', '_')
            es_field_names[facet_name] = es_field_name
        facets['Samples Overview'] = {
            'elasticsearch_field_names': es_field_names,
            'type': 'samples_overview',
            'ui_facet_name': 'Samples Overview'
        }
        facets['Samples Overview'][
            'es_facet'] = elasticsearch_util.get_samples_overview_facet(
                es_field_names)

    app.app.config['NESTED_PATHS'] = elasticsearch_util.get_nested_paths(es)

    # Precompute mapping for getting time series values later.
    mapping = es.indices.get_mapping(index=app.app.config['INDEX_NAME'])

    for facet_config in facets_config:
        es_base_field_name = facet_config['elasticsearch_field_name']
        es_parent_field_name = es_base_field_name.rsplit('.', 1)[0]
        try:
            is_time_series = elasticsearch_util.is_time_series(
                es_base_field_name, mapping)
        except KeyError:
            raise EnvironmentError(
                'Elasticsearch field name %s in ui.json not found in index' %
                es_base_field_name)
        parent_is_time_series = elasticsearch_util.is_time_series(
            es_parent_field_name, mapping)

        if is_time_series:
            time_series_vals = elasticsearch_util.get_time_series_vals(
                es_base_field_name, mapping)
            es_field_names = [
                es_base_field_name + '.' + tsv for tsv in time_series_vals
            ]
            for es_field_name in es_field_names:
                separate_panel = (es_field_name in facets
                                  and facets[es_field_name]['separate_panel'])
                _add_facet(es_field_name, is_time_series,
                           parent_is_time_series, True, separate_panel,
                           facet_config, time_series_vals, facets, es, mapping)
        elif parent_is_time_series:
            time_series_vals = elasticsearch_util.get_time_series_vals(
                es_parent_field_name, mapping)
            time_series_panel = (
                es_base_field_name in facets
                and facets[es_base_field_name]['time_series_panel'])
            _add_facet(es_base_field_name, is_time_series,
                       parent_is_time_series, time_series_panel, True,
                       facet_config, time_series_vals, facets, es, mapping)
        else:
            _add_facet(es_base_field_name, is_time_series,
                       parent_is_time_series, False, True, facet_config, [],
                       facets, es, mapping)

    # Map from Elasticsearch field name to dict with
    # - ui_facet_name: name to display in UI
    # - type: Elasticsearch field type
    # - time_series_panel: If facet is for a time series field, and is part of a
    #       larger time series facet that contains a panel for each time
    #       (https://i.imgur.com/7txytrL.png)
    # - separate_panel: If facet is either not for a time series field, or is
    #       for a time series field with its own separate panel
    #       (https://i.imgur.com/JsTb5r0.png)
    # - time_series_field: If facet is for a time series field
    # - description: optional UI facet description
    # - es_facet: Elasticsearch facet
    app.app.config['FACET_INFO'] = facets
    app.app.config['EXTRA_FACET_INFO'] = {}
Example #5
0
def _process_facets(es):
    config_path = os.path.join(app.app.config['DATASET_CONFIG_DIR'], 'ui.json')
    facets_config = _parse_json_file(config_path)['facets']

    # Preserve order, so facets are returned in same order as the config file.
    facets = OrderedDict()

    app.app.config['NESTED_PATHS'] = elasticsearch_util.get_nested_paths(es)

    # Precompute mapping for getting time series values later.
    mapping = es.indices.get_mapping(index=app.app.config['INDEX_NAME'])

    for facet_config in facets_config:
        es_base_field_name = facet_config['elasticsearch_field_name']
        es_parent_field_name = es_base_field_name.rsplit('.', 1)[0]
        try:
            is_time_series = elasticsearch_util.is_time_series(
                es_base_field_name, mapping)
        except KeyError:
            raise EnvironmentError(
                'Elasticsearch field name %s in ui.json not found in index' %
                es_base_field_name)
        parent_is_time_series = elasticsearch_util.is_time_series(
            es_parent_field_name, mapping)

        if is_time_series:
            time_series_vals = elasticsearch_util.get_time_series_vals(
                es_base_field_name, mapping)
            es_field_names = [
                es_base_field_name + '.' + tsv for tsv in time_series_vals
            ]
            for es_field_name in es_field_names:
                separate_panel = (es_field_name in facets
                                  and facets[es_field_name]['separate_panel'])
                _add_facet(es_field_name, is_time_series,
                           parent_is_time_series, True, separate_panel,
                           facet_config, time_series_vals, facets, es, mapping)
        elif parent_is_time_series:
            time_series_vals = elasticsearch_util.get_time_series_vals(
                es_parent_field_name, mapping)
            time_series_panel = (
                es_base_field_name in facets
                and facets[es_base_field_name]['time_series_panel'])
            _add_facet(es_base_field_name, is_time_series,
                       parent_is_time_series, time_series_panel, True,
                       facet_config, time_series_vals, facets, es, mapping)
        else:
            _add_facet(es_base_field_name, is_time_series,
                       parent_is_time_series, False, True, facet_config, [],
                       facets, es, mapping)

    # Map from Elasticsearch field name to dict with
    # - ui_facet_name: name to display in UI
    # - type: Elasticsearch field type
    # - time_series_panel: If facet is for a time series field, and is part of a
    #       larger time series facet that contains a panel for each time
    #       (https://i.imgur.com/7txytrL.png)
    # - separate_panel: If facet is either not for a time series field, or is
    #       for a time series field with its own separate panel
    #       (https://i.imgur.com/JsTb5r0.png)
    # - time_series_field: If facet is for a time series field
    # - description: optional UI facet description
    # - es_facet: Elasticsearch facet
    app.app.config['FACET_INFO'] = facets
    app.app.config['EXTRA_FACET_INFO'] = {}