Beispiel #1
0
    def test_extract_(self):
        feature_accumulator = extract_features.make_feature_accumulator()

        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp1.xml').read()
        )
        feature_accumulator.extract(article)

        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp2.xml').read()
        )
        feature_accumulator.extract(article)

        feature_accumulator.write('test/merged-extracted')
Beispiel #2
0
 def corenlp(self):
     if self.is_parc_or_annotator_training():
         raise ArticleError(
             'Sorry, no corenlp file available for annotator-training files '
             'and parc3-replication files.\n\nTry this instead:\n\n'
             '>>> polnear.data.train()[0].corenlp()')
     return corenlp_xml_reader.AnnotatedText(
         open(self.path('corenlp')).read())
Beispiel #3
0
    def test_extract(self):
        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp1.xml').read()
        )
        feature_accumulator = extract_features.make_feature_accumulator()
        feature_accumulator.extract(article)

        # Test that the features extracted are the ones expected
        self.assert_feature_like_on_disc(feature_accumulator, 'test/extracted1')
Beispiel #4
0
    def test_merge_load(self):

        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp1.xml').read()
        )
        feature_accumulator = extract_features.make_feature_accumulator()
        feature_accumulator.extract(article)

        feature_accumulator.merge_load('test/extracted2')
        self.assert_feature_like_on_disc(
            feature_accumulator, 'test/merged-extracted')
Beispiel #5
0
    def test_normalized(self):
        feature_accumulator = extract_features.make_feature_accumulator()
        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp1.xml').read()
        )
        feature_accumulator.extract(article)

        normalized_features = {
            token : feature_accumulator.get_features(token, ['dependency'])
            for token in feature_accumulator.dictionary.get_token_list()
        }
        
        open('test/normalized1.json', 'w').write(
            json.dumps(normalized_features)
        )
Beispiel #6
0
 def corenlp(self):
     if self.is_parc_or_annotator_training():
         raise ArticleError('No corenlp file available.')
     return corenlp_xml_reader.AnnotatedText(
         open(self.path('corenlp')).read())