def default_only_result_and_positions_config(output_col_prefix): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, get_positions=True, name='Positional result only default', description='Get the result field and the positions')
def default_doc2chunk_config(output_col_prefix='doc2chunk'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=False, name='doc2chunk', get_meta=False, description='Converts Doc type col to chunk aka entity type', )
def default_feature_assembler_config(output_col_prefix='feature_assembler'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=False, name='features_assembled', get_meta=False, description='Gets nothing', )
def default_sentiment_vivk_config(output_col_prefix='vivk_sentiment'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, get_full_meta=True, # pop_result_list = True, name='Default sentiment vivk', description='Get prediction confidence and the resulting label')
def default_generic_classifier_config(output_col_prefix='generic_classifier'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, name='generic_classifier', get_meta=True, meta_white_list=['confidence'], description='Gets the result and confidence', )
def default_de_identification_config(output_col_prefix='de_identified'): """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """ return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, name='positional_relation_extraction', description= 'Get relation extraction result and all metadata, which will include positions of entities chunks', )
def meta_NER_config(output_col_prefix='NER'): """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """ return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, get_meta=True, meta_white_list=['confidence'], name='default_ner', description='NER with IOB tags and confidences for them', )
def default_yake_config(output_col_prefix='keywords'): """Extracts YAKE keywords with confidences """ return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, name='default_yake', get_meta=True, meta_white_list=['score'], description='Get all keywords and their confidences', pop_never=True)
def default_assertion_config(output_col_prefix='assertion'): """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """ return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, name='default_assertion_extraction', get_meta=True, meta_white_list=['confidence'], description='Gets the assertion result and confidence', )
def default_ner_converter_config(output_col_prefix='ner_chunk'): """Extracts the Entity Labels, which are derived from the IOB Tags """ return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, name='default_ner', get_meta=True, meta_white_list=['entity', 'confidence'], description= 'Converts IOB-NER representation into entity representation and generates confidences for the entire entity chunk', )
def default_classifier_dl_config(output_col_prefix='classifier_dl'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, get_full_meta=True, name='default_classifier_dl', description='Get all predicted confidences and labels', meta_data_extractor=SparkNLPExtractor( extract_maximum_confidence, 'Instead returning confidence for each class, only return max confidence', 'Max confidence'))
def default_chunk_resolution_config(output_col_prefix='resolved_entities'): """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """ return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, get_meta=True, meta_white_list=['confidence', 'resolved_text'], # sentence, chunk name='default_ner', description= 'Converts IOB-NER representation into entity representation and generates confidences for the entire entity chunk', )
def default_sentiment_dl_config(output_col_prefix='sentiment_dl'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, get_full_meta=True, name='Only keep maximum sentiment confidence ', description= 'Instead of r eturning the confidence for Postive and Negative, only the confidence of the more likely class will be returned in the confidence column', meta_data_extractor=SparkNLPExtractor( extract_maximum_confidence, 'Instead of returining positive/negative confidence, only the maximum confidence will be returned withouth sentence number reference.', 'Maximum binary confidence'))
def default_language_classifier_config(output_col_prefix='language'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, get_meta=True, get_full_meta=True, pop_result_list=True, name='Only keep maximum language confidence', description= 'Instead of returning the confidence for every language the Classifier was traiend on, only the maximum confidence will be returned', meta_data_extractor=SparkNLPExtractor( meta_extract_language_classifier_max_confidence, 'Extract the maximum confidence from all classified languages and drop the others. TODO top k results', 'Keep only top language confidence'))
def default_lang_classifier_config(output_col_prefix='sentiment_dl'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, get_full_meta=True, pop_meta_list=True, pop_result_list=True, name='default_lang_classifier_config', description= 'Instead of returning the confidence for every language, just returns the confidence of the most likely language', meta_data_extractor=SparkNLPExtractor( extract_maximum_confidence, 'Instead of returining positive/negative confidence, only the maximum confidence will be returned withouth sentence number reference.', 'Maximum binary confidence'))
def default_full_config(output_col_prefix='DEFAULT'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_positions=True, get_begin=True, get_end=True, get_embeds=True, get_result=True, get_meta=True, get_full_meta=True, get_annotator_type=True, name='default_full', description= 'Default full configuration, keeps all data and gets all metadata fields', )
def default_relation_extraction_config( output_col_prefix='extracted_relations'): """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """ return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, get_result=True, meta_white_list=[], get_meta=True, meta_black_list=[ 'entity1_begin', 'entity2_begin', 'entity1_end', 'entity2_end', ], name='default_relation_extraction', description= 'Get relation extraction result and all metadata, positions of entities excluded', )
def default_only_embedding_config(output_col_prefix): return SparkNLPExtractorConfig(output_col_prefix=output_col_prefix, get_embeds=True, name='Default Embed extractor', description='Just get the Embed field')
def default_only_result_config(output_col_prefix): return SparkNLPExtractorConfig(output_col_prefix=output_col_prefix, get_result=True, name='Default result extractor', description='Just gets the result field')
def default_document_config(output_col_prefix='document'): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, pop_result_list=True, get_result=True, )
def default_get_nothing(output_col_prefix): return SparkNLPExtractorConfig( output_col_prefix=output_col_prefix, name='nothing_extractor', description= 'Extracts nothing. Useful for annotators with irrelevant data')