예제 #1
0
 def save(self, *args, **kwargs):
     if self.concept_db is None:
         cdb = CDB()
         cdb.save_dict('empty_cdb.dat')
         f = open('empty_cdb.dat', 'rb')
         cdb_obj = ConceptDB()
         cdb_obj.name = f'{self.name}_empty_cdb'
         cdb_obj.cdb_file.save(f'{self.name}_empty_cdb.dat', File(f))
         cdb_obj.use_for_training = True
         cdb_obj.save()
         self.concept_db = cdb_obj
     super(ProjectAnnotateEntities, self).save(*args, **kwargs)
# Load the vocab model you just downloaded
vocab.load_dict(os.path.join(medcat_path, 'med_ann_norm_dict.dat'))

# If you have an existing CDB
cdb = CDB()
# cdb.load_dict(os.path.join(medcat_path, 'simple_cdb.csv'))


# If you need a special CDB you can build one from a .csv file
preparator = PrepareCDB(vocab=vocab)
csv_paths = [os.path.join(medcat_path, 'simple_cdb.csv')]#, '<another one>', ...]
csv_paths = [os.path.join(medcat_path, 'attention_cdb.csv')]
cdb = preparator.prepare_csvs(csv_paths)

# Save the new CDB for later
cdb.save_dict(os.path.join(medcat_path, 'simple_cdb.cdb'))

# To annotate documents we do
doc = "My simple document with kidney failure"
cat = CAT(cdb=cdb, vocab=vocab)
cat.train = False
doc_spacy = cat(doc)
# Entities are in
doc_spacy._.ents
# Or to get a json
doc_json = cat.get_json(doc)

# To have a look at the results:
from spacy import displacy
# Note that this will not show all entites, but only the longest ones
displacy.serve(doc_spacy, style='ent')