class DiseaseExtractor(object):
    def __init__(self):
        self.trie = TokenTrie(name="disease")
        if self.trie.is_empty():
            diseases_file = open(os.path.join(*[os.path.dirname(__file__), 'data', 'UMLS', 'diseases.txt']))
            disease_names = diseases_file.read().split('\n')
            count = 0
            for disease in disease_names:
                self.trie.add(tokenize(disease))
                print disease
                count += 1
            self.trie.save_to_cache()

    def extract(self, text):
        sequences = self.trie.scan(tokenize(text))
        # assemble sequences into tokens
        return [" ".join(seq) for seq in sequences]
class SymptomExtractor(object):
    def __init__(self):
        self.trie = TokenTrie(name="symptoms")
        if self.trie.is_empty():
            symptoms_file = open(os.path.join(*[os.path.dirname(__file__), 'data', 'UMLS', 'diseases.txt']))
            symptoms = symptoms_file.read().split('\n')
            count = 0
            for symptom in symptoms:
                (code, name) = symptom.split("\t")
                self.trie.add(tokenize(name))
                print name
                count += 1
            self.trie.save_to_cache()

    def extract(self, text):
        sequences = self.trie.scan(tokenize(text))
        # assemble sequences into tokens
        return [" ".join(seq) for seq in sequences]
Example #3
0
class DiseaseExtractor(object):
    def __init__(self):
        self.trie = TokenTrie(name="disease")
        if self.trie.is_empty():
            diseases_file = open(
                os.path.join(*[
                    os.path.dirname(__file__), 'data', 'UMLS', 'diseases.txt'
                ]))
            disease_names = diseases_file.read().split('\n')
            count = 0
            for disease in disease_names:
                self.trie.add(tokenize(disease))
                print disease
                count += 1
            self.trie.save_to_cache()

    def extract(self, text):
        sequences = self.trie.scan(tokenize(text))
        # assemble sequences into tokens
        return [" ".join(seq) for seq in sequences]
Example #4
0
class SymptomExtractor(object):
    def __init__(self):
        self.trie = TokenTrie(name="symptoms")
        if self.trie.is_empty():
            symptoms_file = open(
                os.path.join(*[
                    os.path.dirname(__file__), 'data', 'UMLS', 'diseases.txt'
                ]))
            symptoms = symptoms_file.read().split('\n')
            count = 0
            for symptom in symptoms:
                (code, name) = symptom.split("\t")
                self.trie.add(tokenize(name))
                print name
                count += 1
            self.trie.save_to_cache()

    def extract(self, text):
        sequences = self.trie.scan(tokenize(text))
        # assemble sequences into tokens
        return [" ".join(seq) for seq in sequences]