예제 #1
0
def default_only_result_and_positions_config(output_col_prefix):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_positions=True,
        name='Positional result only default',
        description='Get the result field and the positions')
예제 #2
0
def default_doc2chunk_config(output_col_prefix='doc2chunk'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=False,
        name='doc2chunk',
        get_meta=False,
        description='Converts Doc type col to chunk aka entity type',
    )
예제 #3
0
def default_feature_assembler_config(output_col_prefix='feature_assembler'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=False,
        name='features_assembled',
        get_meta=False,
        description='Gets nothing',
    )
예제 #4
0
def default_sentiment_vivk_config(output_col_prefix='vivk_sentiment'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_full_meta=True,
        # pop_result_list     = True,
        name='Default sentiment vivk',
        description='Get prediction confidence and the resulting label')
예제 #5
0
def default_generic_classifier_config(output_col_prefix='generic_classifier'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        name='generic_classifier',
        get_meta=True,
        meta_white_list=['confidence'],
        description='Gets the  result and confidence',
    )
예제 #6
0
def default_de_identification_config(output_col_prefix='de_identified'):
    """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        name='positional_relation_extraction',
        description=
        'Get relation extraction result and all metadata, which will include positions of entities chunks',
    )
예제 #7
0
def meta_NER_config(output_col_prefix='NER'):
    """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_meta=True,
        meta_white_list=['confidence'],
        name='default_ner',
        description='NER with IOB tags and confidences for them',
    )
예제 #8
0
def default_yake_config(output_col_prefix='keywords'):
    """Extracts YAKE keywords with confidences """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        name='default_yake',
        get_meta=True,
        meta_white_list=['score'],
        description='Get all keywords and their confidences',
        pop_never=True)
예제 #9
0
def default_assertion_config(output_col_prefix='assertion'):
    """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        name='default_assertion_extraction',
        get_meta=True,
        meta_white_list=['confidence'],
        description='Gets the assertion result and confidence',
    )
예제 #10
0
def default_ner_converter_config(output_col_prefix='ner_chunk'):
    """Extracts the Entity Labels, which are derived from the IOB Tags """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        name='default_ner',
        get_meta=True,
        meta_white_list=['entity', 'confidence'],
        description=
        'Converts IOB-NER representation into entity representation and generates confidences for the entire entity chunk',
    )
예제 #11
0
def default_classifier_dl_config(output_col_prefix='classifier_dl'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_full_meta=True,
        name='default_classifier_dl',
        description='Get all predicted confidences and labels',
        meta_data_extractor=SparkNLPExtractor(
            extract_maximum_confidence,
            'Instead returning confidence for each class, only return max confidence',
            'Max confidence'))
예제 #12
0
def default_chunk_resolution_config(output_col_prefix='resolved_entities'):
    """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_meta=True,
        meta_white_list=['confidence', 'resolved_text'],  # sentence, chunk
        name='default_ner',
        description=
        'Converts IOB-NER representation into entity representation and generates confidences for the entire entity chunk',
    )
예제 #13
0
def default_sentiment_dl_config(output_col_prefix='sentiment_dl'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_full_meta=True,
        name='Only keep maximum sentiment confidence ',
        description=
        'Instead of r eturning the confidence for Postive and Negative, only the confidence of the more likely class will be returned in the confidence column',
        meta_data_extractor=SparkNLPExtractor(
            extract_maximum_confidence,
            'Instead of returining positive/negative confidence, only the maximum confidence will be returned withouth sentence number reference.',
            'Maximum binary confidence'))
예제 #14
0
def default_language_classifier_config(output_col_prefix='language'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_meta=True,
        get_full_meta=True,
        pop_result_list=True,
        name='Only keep maximum language confidence',
        description=
        'Instead of returning the confidence for every language the Classifier was traiend on, only the maximum confidence will be returned',
        meta_data_extractor=SparkNLPExtractor(
            meta_extract_language_classifier_max_confidence,
            'Extract the maximum confidence from all classified languages and drop the others. TODO top k results',
            'Keep only top language confidence'))
예제 #15
0
def default_lang_classifier_config(output_col_prefix='sentiment_dl'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        get_full_meta=True,
        pop_meta_list=True,
        pop_result_list=True,
        name='default_lang_classifier_config',
        description=
        'Instead of returning the confidence for every language, just returns the confidence of the most likely language',
        meta_data_extractor=SparkNLPExtractor(
            extract_maximum_confidence,
            'Instead of returining positive/negative confidence, only the maximum confidence will be returned withouth sentence number reference.',
            'Maximum binary confidence'))
예제 #16
0
def default_full_config(output_col_prefix='DEFAULT'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_positions=True,
        get_begin=True,
        get_end=True,
        get_embeds=True,
        get_result=True,
        get_meta=True,
        get_full_meta=True,
        get_annotator_type=True,
        name='default_full',
        description=
        'Default full configuration, keeps all data and gets all metadata fields',
    )
예제 #17
0
def default_relation_extraction_config(
        output_col_prefix='extracted_relations'):
    """Extracts NER tokens withouth positions, just the IOB tags,confidences and classified tokens """
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        get_result=True,
        meta_white_list=[],
        get_meta=True,
        meta_black_list=[
            'entity1_begin',
            'entity2_begin',
            'entity1_end',
            'entity2_end',
        ],
        name='default_relation_extraction',
        description=
        'Get relation extraction result and all metadata, positions of entities excluded',
    )
예제 #18
0
def default_only_embedding_config(output_col_prefix):
    return SparkNLPExtractorConfig(output_col_prefix=output_col_prefix,
                                   get_embeds=True,
                                   name='Default Embed extractor',
                                   description='Just get the Embed field')
예제 #19
0
def default_only_result_config(output_col_prefix):
    return SparkNLPExtractorConfig(output_col_prefix=output_col_prefix,
                                   get_result=True,
                                   name='Default result extractor',
                                   description='Just gets the result field')
예제 #20
0
def default_document_config(output_col_prefix='document'):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        pop_result_list=True,
        get_result=True,
    )
예제 #21
0
def default_get_nothing(output_col_prefix):
    return SparkNLPExtractorConfig(
        output_col_prefix=output_col_prefix,
        name='nothing_extractor',
        description=
        'Extracts nothing. Useful for annotators with irrelevant data')