Exemple #1
0
def serialize_doc(doc: Doc) -> Dict[str, any]:
    dep = []
    return {
        'text': doc.text,
        'tokens': list(map(serialize_token, doc.__iter__())),
        'noun_chunks': list(map(serialize_span, doc.noun_chunks)),
        'data': doc.to_json(),
        # 'dep': dep,
    }
Exemple #2
0
    def encode(cls, obj: Doc) -> str:
        """Encode the Doc object.

        Args:
            obj:

        Returns:
        """
        # JSON dump the Doc
        doc_json = obj.to_json()

        if obj._.has("huggingface_neuralcoref"):
            # Create a helper function that turns a Span into a dictionary
            span_to_dict = lambda span: {
                "start": span.start,
                "end": span.end,
                "text": span.text,
            }

            # Create a helper function that converts a Cluster (output of
            # neuralcoref) into a dictionary
            cluster_to_dict = lambda cluster: {
                "i": cluster.i,
                "main": span_to_dict(cluster.main),
                "mentions": [span_to_dict(span) for span in cluster.mentions],
            }

            # Apply the helper functions to construct a dictionary for the
            # neuralcoref information
            neuralcoref_dict = {
                "neuralcoref":
                [cluster_to_dict(cluster) for cluster in obj._.coref_clusters]
            }

            # Combine the neuralcoref dictionary with the doc_json
            doc_json = tz.merge(doc_json, neuralcoref_dict)

        # Convert the Spacy Doc to json before caching
        return json.dumps(doc_json)