Esempi in Python per Vocabulary.readPostProcessingVoc

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: Vocabulary

Classe/tipologia: Vocabulary

Metodo/funzione: readPostProcessingVoc

Esempi su hotexamples.com: 2

Vocabulary.readPostProcessingVoc in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per Vocabulary.Vocabulary.readPostProcessingVoc, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Vocabulary(30)

add_token(5)

load(5)

add_word(5)

save(3)

get_vocab(3)

from_serializable(3)

get_word(3)

index(2)

build_from_token(2)

make_vocab_charts(2)

readPostProcessingVoc(2)

get_index(2)

getIndex(2)

fetch(2)

addSentence(1)

load_bigquery_vocab_from_indexed(1)

load_vocab_from_local(1)

load_word_from_data(1)

make_array_of_words_from_sentences(1)

prune(1)

restore_text(1)

add_sentence_pair(1)

save_dict(1)

loadIndexFile(1)

sentence2indices(1)

similar(1)

size(1)

sorted_tokens(1)

startSymbolWordID(1)

symbol(1)

text2ids(1)

to_index(1)

unknownWordID(1)

sentence2index(1)

incrementDF(1)

addSymbol(1)

from_serialiable(1)

add_words(1)

build_vocabulary(1)

checkIndex(1)

create(1)

create_from_text(1)

de_tokenize_data(1)

endSymbolWordID(1)

expand(1)

export_vocabulary(1)

addWord(1)

isATerm(1)

getCF(1)

Esempio n. 1

Mostra file

    def postProcessing(clinicalNotes, nejiAnnotations, vocabularies):
        """
		:param clinicalNotes: Dict of clinical notes with the following structure (but only the "cn" from each file will be used)
			{
				"train":{
					"file name"":{
						"cn": "clinical note",
						"annotation":{
							"id":("concept","type",[(span,span), ...])
						},
						"relation":{
							"id": (annId1, ("concept","type",[(span,span), ...]))
						}
					}
				}
				"test":{...}
			}
		:param nejiAnnotations: Dict with the annotations, key is the dataset (train or test), value is another dict containing the
		files name as key and a list of annotations. The annotations have the following structure: date|UMLS:C2740799:T129:DrugsBank|10
			{
				"train":{
					"file name"":["annotation"]
				}
				"test":{...}
			}
		:param vocabularies: Vocabularies to be used in the post processing
		:return: Dict with the drug and strength/dosage/route/span (list) present in each file, by dataset.
			{
				"train":{
					"file name"":{
						("concept", annSpan):[strength, dosage, route]
					}
				}
				"test":{...}
			}
		"""
        voc = Vocabulary.readPostProcessingVoc(vocabularies)
        annotations = {}
        for dataset in nejiAnnotations:
            annotations[dataset] = {}
            for file in nejiAnnotations[dataset]:
                annotations[dataset][file] = {}
                clinicalNote = clinicalNotes[dataset][file]["cn"]
                annotation = sorted(nejiAnnotations[dataset][file],
                                    key=lambda x: int(x[2]))
                annotation = Utils.cleanConceptBegin(annotation)
                disambiguatedAnn = Utils.disambiguate(annotation)
                filteredAnn = Annotator._filter(
                    disambiguatedAnn, Utils.getVocListWithoutGroup(
                        voc["all"]))  #voc["black-list"]))
                if len(filteredAnn) > 0:
                    sentences = Utils.getSentencesByAnnotation(
                        clinicalNote, filteredAnn)

                    readSpans = []
                    for (annConcept, annCode, annSpan) in filteredAnn:
                        results = [None, None, None, None]
                        if annSpan in readSpans:
                            continue
                        readSpans.append(annSpan)

                        if int(annSpan) not in sentences:
                            continue

                        results[ROUTE] = Annotator._annotateRoute(
                            sentences[int(annSpan)], voc["route-complex"],
                            voc["route"])

                        if results[ROUTE] != None:
                            filterAnn = [
                                (concept, code, span)
                                for (concept, code, span) in annotation
                                if span == annSpan and concept is not None
                            ]
                            if len(filterAnn) > 1:
                                drug, strength = Utils.mergeAnnsToGetStrength(
                                    filterAnn)
                                if drug:
                                    results[STRENGTH] = strength
                            else:
                                drug = filterAnn[0][0]

                            sentence = ' '.join(sentences[int(annSpan)])
                            if results[STRENGTH] == None:
                                results[
                                    STRENGTH] = Annotator._annotateStrength(
                                        sentence)
                            results[DOSAGE] = Annotator._annotateDosage(
                                sentence)

                            annotations[dataset][file][(drug,
                                                        annSpan)] = results

        return annotations

Esempio n. 2

Mostra file

File: Annotator.py Progetto: masterwongjkd/DrAC

    def posProcessing(clinicalNotes, nejiAnnotations, vocabularies):
        """
		:param clinicalNotes: Dict of clinical notes with the following structure (but only the "cn" from each file will be used)
			{
				"train":{
					"file name"":{
						"cn": "clinical note",
						"annotation":{
							"id":("concept","type",[(span,span), ...])
						},
						"relation":{
							"id": (annId1, ("concept","type",[(span,span), ...]))
						}
					}
				}
				"test":{...}
			}
		:param nejiAnnotations: Dict with the annotations, key is the dataset (train or test), value is another dict containing the
		files name as key and a list of annotations. The annotations have the following structure: date|UMLS:C2740799:T129:DrugsBank|10
			{
				"train":{
					"file name"":["annotation"]
				}
				"test":{...}
			}
		:param vocabularies: Vocabularies to be used in the post processing
		:return: Dict with the drug and strenght/dosage/quantity/route/span (list) present in each file, by dataset.
			{
				"train":{
					"file name"":{
						"concept":[strenght, dosage, route, quantity, [annSpann]]
					}
				}
				"test":{...}
			}
		"""
        voc = Vocabulary.readPostProcessingVoc(vocabularies)
        annotations = {}
        for dataset in nejiAnnotations:
            annotations[dataset] = {}
            for file in nejiAnnotations[dataset]:
                annotations[dataset][file] = {}
                clinicalNote = clinicalNotes[dataset][file]["cn"]
                annotation = sorted(nejiAnnotations[dataset][file],
                                    key=lambda x: int(x[2]))
                readedSpans = []

                clinicalNote = re.sub(
                    r'\n', ' ',
                    clinicalNote)  # re.sub(r'(?![\w ])\n', ' ', clinicalNote)

                # clinicalNote = re.sub(r'[ ]{2,}', '\n', clinicalNote) #THIS RULE IS MORE SPECIFIC AND RUINS THE DIRECT MAPPING FROM SPAN TO ANNOTATION
                # print(clinicalNote)

                clinicalNoteSentencesDict = nltkSentenceSplit(clinicalNote)

                for (annConcept, annCode, annSpan) in annotation:
                    results = [None, None, None, None]

                    # print("Original annotation: {} {}".format(annConcept, annSpan))
                    # print("New annotation: {}".format(clinicalNote[int(annSpan):int(annSpan)+len(annConcept)]))

                    if annSpan in readedSpans:
                        continue
                    readedSpans.append(annSpan)
                    # sentence = Utils.getSentence(int(annSpan), clinicalNote)
                    initialSpan, sentence = Utils.getSentenceFromSentencesDict(
                        int(annSpan), clinicalNoteSentencesDict)
                    # print(sentence[int(annSpan)-int(initialSpan):int(annSpan)-int(initialSpan)+len(annConcept)])
                    # print("Original annotation: {} {}".format(annConcept, annSpan))
                    # print("New annotation: {}".format(sentence[int(annSpan)-int(initialSpan):int(annSpan)-int(initialSpan)+len(annConcept)]))
                    print(file)
                    if "docusate sodium" in annConcept.lower():
                        print("shit")
                        print(initialSpan, annSpan, sentence)
                        if "docusate sodium" in sentence.lower():
                            print("worked")

                    results[ROUTE] = Annotator._annotateRoute(
                        sentence, voc["route"])
                    if results[ROUTE] != None:
                        filterAnn = [(concept, code, span)
                                     for (concept, code, span) in annotation
                                     if span == annSpan and concept is not None
                                     ]
                        if len(filterAnn) > 1:
                            drug, dosage = Utils.mergeAnnsToGetStrength(
                                filterAnn)
                            if drug:
                                results[STRENGHT] = dosage
                        else:
                            drug = filterAnn[0][0]

                        if results[STRENGHT] == None:
                            results[STRENGHT] = Annotator._annotateStrenght(
                                drug, sentence, voc["strenght"])
                        results[DOSAGE] = Annotator._annotateDosage(
                            drug, sentence, voc["all"])
                        #results[QUANTITY] = Annotator._annotateQuantity(filterAnn[0], sentence, results[ROUTE])
                        results[SPAN] = [annSpan]

                        annotations[dataset][file][drug] = results
        return annotations