def map_annotations_cols(dataframe: DataFrame, f, columns: list, output_column: str, annotatyon_type: str, output_type: DataType = Annotation.arrayType()): return dataframe.withColumn( output_column, map_annotations_array(f, output_type)(array(*columns)).alias( output_column, metadata={'annotatorType': annotatyon_type}))
def map_annotations_cols(dataframe: DataFrame, f, columns: list, output_column: str, annotatyon_type: str, output_type: DataType = Annotation.arrayType()): """Creates a Spark UDF to map over multiple columns of Annotation results. Parameters ---------- dataframe : DataFrame Input DataFrame f : function Function to apply to the column columns : list Name of the input column output_column : str Name of the output column annotatyon_type : str Annotator type output_type : DataType, optional Output type, by default Annotation.arrayType() Returns ------- :class:`pyspark.sql.DataFrame` Transformed DataFrame Examples -------- >>> from sparknlp.pretrained import PretrainedPipeline >>> from sparknlp.functions import * >>> explain_document_pipeline = PretrainedPipeline("explain_document_dl") >>> data = spark.createDataFrame([["U.N. official Ekeus heads for Baghdad."]]).toDF("text") >>> result = explain_document_pipeline.transform(data) >>> chunks_df = map_annotations_cols( ... result, ... lambda x: [ ... Annotation("tag", a.begin, a.end, a.result, a.metadata, a.embeddings) ... for a in x ... ], ... ["pos", "ner"], ... "tags", ... "chunk" ... ) >>> chunks_df.selectExpr("explode(tags)").show(truncate=False) +-------------------------------------------+ |col | +-------------------------------------------+ |[tag, 0, 2, NNP, [word -> U.N], []] | |[tag, 3, 3, ., [word -> .], []] | |[tag, 5, 12, JJ, [word -> official], []] | |[tag, 14, 18, NNP, [word -> Epeus], []] | |[tag, 20, 24, VBZ, [word -> heads], []] | |[tag, 26, 28, IN, [word -> for], []] | |[tag, 30, 36, NNP, [word -> Baghdad], []] | |[tag, 37, 37, ., [word -> .], []] | |[tag, 0, 2, B-ORG, [word -> U.N], []] | |[tag, 3, 3, O, [word -> .], []] | |[tag, 5, 12, O, [word -> official], []] | |[tag, 14, 18, B-PER, [word -> Ekeus], []] | |[tag, 20, 24, O, [word -> heads], []] | |[tag, 26, 28, O, [word -> for], []] | |[tag, 30, 36, B-LOC, [word -> Baghdad], []]| |[tag, 37, 37, O, [word -> .], []] | +-------------------------------------------+ """ return dataframe.withColumn( output_column, map_annotations_array(f, output_type)(array(*columns)).alias( output_column, metadata={'annotatorType': annotatyon_type}))
def map_annotations_col(dataframe: DataFrame, f, column: str, output_column: str, annotatyon_type: str, output_type: DataType = Annotation.arrayType()): """Creates a Spark UDF to map over a column of Annotation results. Parameters ---------- dataframe : DataFrame Input DataFrame f : function Function to apply to the column column : str Name of the input column output_column : str Name of the output column annotatyon_type : str Annotator type output_type : DataType, optional Output type, by default Annotation.arrayType() Returns ------- :class:`pyspark.sql.DataFrame` Transformed DataFrame Examples -------- >>> from sparknlp.pretrained import PretrainedPipeline >>> from sparknlp.functions import * >>> explain_document_pipeline = PretrainedPipeline("explain_document_dl") >>> data = spark.createDataFrame([["U.N. official Ekeus heads for Baghdad."]]).toDF("text") >>> result = explain_document_pipeline.transform(data) >>> chunks_df = map_annotations_col( ... result, ... lambda x: [ ... Annotation("chunk", a.begin, a.end, a.result, a.metadata, a.embeddings) ... for a in x ... ], ... "pos", ... "pos_chunk", ... "chunk", ... ) >>> chunks_df.selectExpr("explode(pos_chunk)").show() +--------------------+ | col| +--------------------+ |[chunk, 0, 2, NNP...| |[chunk, 3, 3, ., ...| |[chunk, 5, 12, JJ...| |[chunk, 14, 18, N...| |[chunk, 20, 24, V...| |[chunk, 26, 28, I...| |[chunk, 30, 36, N...| |[chunk, 37, 37, ....| +--------------------+ """ return dataframe.withColumn( output_column, map_annotations(f, output_type)(column).alias( output_column, metadata={'annotatorType': annotatyon_type}))