예제 #1
0
class TaggerTestCase(unittest.TestCase):
    def setUp(self):
        self.tagger = BioEntityTagger()

    def testTaggerNLP(self):

        for i, text in enumerate(file('resources/test_abstract_nlp.txt')):
            print i
            for tag in self.tagger.tag(text.lower()):
                print tag, text[tag['start']:tag['end']]

    def testTaggerLexebi(self):
        for i, text in enumerate(file('resources/test_abstract_lexebi.txt')):

            print i
            # for tag in tagger.tag(text.lower()):
            #     print tag, text[tag['start']:tag['end']]
            old_tags = set()
            lexebi_tags = set()
            tags = self.tagger.tag(text.lower())
            for tag in tags:
                matched_text = text[tag['start']:tag['end']]
                print tag, matched_text
                if tag['reference_db'] == 'LEXEBI':
                    lexebi_tags.add(matched_text)
                else:
                    old_tags.add(matched_text)
            new_tags = lexebi_tags.difference(old_tags)
            print 'New tags identified : {}'.format(new_tags)
예제 #2
0
    def init_models(self):
        steps_done = []

        try:
            # steps_done.append('DOWNLOADING TEXTBLOB LITE CORPORA')
            # MIN_CORPORA = [
            #     'brown',  # Required for FastNPExtractor
            #     'punkt',  # Required for WordTokenizer
            #     'wordnet',  # Required for lemmatization
            #     'averaged_perceptron_tagger',  # Required for NLTKTagger
            # ]
            # for each in MIN_CORPORA:
            #         nltk.download(each)
            # # nltk.download()
            steps_done.append('STARTING NLPAnalysis')
            self.nlp = NLPAnalysis._init_spacy_english_language()
            steps_done.append('STARTING TAGGER')
            self._tagger = BioEntityTagger(partial_match=False)
            self.analyzers = [
                DocumentAnalysisSpacy(self.nlp, tagger=self._tagger),
                NounChuncker()
            ]
            steps_done.append('NLP MODEL INITIALIZED')
        except:
            logging.exception('NLP MODEL INIT FAILED MISERABLY')
            steps_done.append('NLP MODEL INIT FAILED MISERABLY')
            self.analyzers = []

        logging.info(steps_done)
예제 #3
0
 def setUpClass(cls):
     cls.nlp = init_spacy_english_language()
     cls.tagger = BioEntityTagger(partial_match=False)
예제 #4
0
 def init_tagger(self):
     self.tagger = BioEntityTagger()
예제 #5
0
 def setUp(self):
     self.tagger = BioEntityTagger()