def _make_headers_df(headers_response):
    """
    Parses the headers portion of the watson response and creates the header dataframe.
    :param headers_response: the ``row_header`` or ``column_header`` array as returned
    from the Watson response,

    :return: the completed header dataframe
    """

    headers_df = util.make_dataframe(headers_response)
    headers_df = headers_df[[
        "text", "column_index_begin", "column_index_end", "row_index_begin",
        "row_index_end", "cell_id", "text_normalized"
    ]]
    return headers_df
def _make_body_cells_df(body_cells_response):
    """
    parses the body_cells portion of the watson response and creates the body_cells dataframe.
    :param body_cells_response: the "body cells" array as returned from the watson response

    :return: the completed body_cells dataframe
    """
    body_cells_df = util.make_dataframe(body_cells_response)
    if not "attributes.type" in body_cells_df.columns.to_list():
        body_cells_df["attributes.type"] = None
        body_cells_df["attributes.text"] = None
    body_cells_df = body_cells_df[[
        "text", "column_index_begin", "column_index_end", "row_index_begin",
        "row_index_end", "cell_id", "column_header_ids", "column_header_texts",
        "row_header_ids", "row_header_texts", "attributes.text",
        "attributes.type"
    ]]
    return body_cells_df
Example #3
0
def parse_response(
        response: Dict[str, Any],
        original_text: str = None,
        apply_standard_schema: bool = False) -> Dict[str, pd.DataFrame]:
    """
    Parse a Watson NLU response as a decoded JSON string, e.g. dictionary containing
    requested features and convert into a dict of Pandas DataFrames. The following
    features in the response will be converted:
        * entities
        * entity_mentions (elements of the "mentions" field of `response["entities"]`)
        * keywords
        * relations
        * semantic_roles
        * syntax

    For information on getting started with Watson Natural Language Understanding on
    IBM Cloud, see
    https://cloud.ibm.com/docs/natural-language-understanding?topic=natural-language-understanding-getting-started.
    A Python SDK for authentication and making requests to the service is provided at
    https://github.com/watson-developer-cloud/python-sdk.  Details on the supported
    features and available options when making the request can be found at
    https://cloud.ibm.com/apidocs/natural-language-understanding?code=python#analyze-text.

    .. note:: Additional feature data in response will not be processed

    >>> response = natural_language_understanding.analyze(
    ...     url="https://raw.githubusercontent.com/CODAIT/text-extensions-for-pandas/master/resources/holy_grail.txt",
    ...         return_analyzed_text=True,
    ...         features=Features(
    ...         entities=EntitiesOptions(sentiment=True),
    ...         keywords=KeywordsOptions(sentiment=True, emotion=True),
    ...         relations=RelationsOptions(),
    ...         semantic_roles=SemanticRolesOptions(),
    ...         syntax=SyntaxOptions(sentences=True, tokens=SyntaxOptionsTokens(lemma=True, part_of_speech=True))
    ...     )).get_result()
    >>> dfs = parse_response(response)
    >>> dfs.keys()
    dict_keys(['syntax', 'entities', 'keywords', 'relations', 'semantic_roles'])
    >>> dfs["syntax"].head()
       span part_of_speech      lemma  \
    0  [0, 5): 'Monty'          PROPN    None
    1  [6, 12): 'Python'        PROPN  python

                                                sentence
    0  [0, 273): 'Monty Python and the Holy Grail is ...
    1  [0, 273): 'Monty Python and the Holy Grail is ...

    :param response: A dictionary of features from the IBM Watson NLU response
    :param original_text: Optional original text sent in request, if None will
                          look for "analyzed_text" keyword in response
    :param apply_standard_schema: Return DataFrames with a set schema, whether data
                                  was present in the response or not
    :return: A dictionary mapping feature name to a Pandas DataFrame
    """
    dfs = {}

    if original_text is None and "analyzed_text" in response:
        original_text = response["analyzed_text"]

    # Create the syntax DataFrame
    syntax_response = response.get("syntax", {})
    token_df, sentence_df = _make_syntax_dataframes(syntax_response,
                                                    original_text)
    sentence_series = sentence_df.get("sentence_span")
    if sentence_series is not None:
        syntax_df = _merge_syntax_dataframes(token_df, sentence_series)
    else:
        syntax_df = pd.concat([token_df, sentence_df], axis=1)
    dfs["syntax"] = util.apply_schema(syntax_df, _syntax_schema,
                                      apply_standard_schema)

    if original_text is None and "span" in dfs["syntax"].columns:
        char_span = dfs["syntax"]["span"]
        if isinstance(char_span, SpanArray):
            original_text = dfs["syntax"]["span"].target_text
        else:
            warnings.warn("Did not receive and could not build original text")

    # Create the entities DataFrames
    entities = response.get("entities", [])
    entities_df, entity_mentions_df = _make_entity_dataframes(
        entities, original_text)
    dfs["entities"] = util.apply_schema(entities_df, _entities_schema,
                                        apply_standard_schema)
    dfs["entity_mentions"] = util.apply_schema(entity_mentions_df,
                                               _entity_mentions_schema,
                                               apply_standard_schema)

    # Create the keywords DataFrame
    keywords = response.get("keywords", [])
    keywords_df = util.make_dataframe(keywords)
    dfs["keywords"] = util.apply_schema(keywords_df, _keywords_schema,
                                        apply_standard_schema)

    # Create the relations DataFrame
    relations = response.get("relations", [])
    relations_df = _make_relations_dataframe(relations, original_text,
                                             sentence_series)
    dfs["relations"] = util.apply_schema(relations_df, _relations_schema,
                                         apply_standard_schema)

    # Create the semantic roles DataFrame
    semantic_roles = response.get("semantic_roles", [])
    semantic_roles_df = util.make_dataframe(semantic_roles)
    dfs["semantic_roles"] = util.apply_schema(semantic_roles_df,
                                              _semantic_roles_schema,
                                              apply_standard_schema)

    if "warnings" in response:
        # TODO: check structure of warnings and improve message
        warnings.warn(str(response["warnings"]))

    return dfs