예제 #1
0
import xml.dom.minidom as Dom
import sys
import os
import shutil
import numpy as np
from keras.preprocessing import text
import tensorflow as tf
import random

from rnn_atten import TRNNConfig, TextRNN
from data.cnews_loader import read_vocab, read_category, batch_iter, process_file, clean_wds, get_dic
from data.cnews_loader import build_vocab, build_vocab_words, loadWord2Vec, expand_abbr, txt_proc
from data.cnews_loader import read_category_textual, read_category_intuitive
import run_rnn as rnn

train_dic = get_dic('data/Obesity_data/train_groundtruth.xml')
test_dic = get_dic('data/Obesity_data/test_groundtruth.xml')

test_dic_text_rule = get_dic(
    'perl_classifier/output/system_textual_annotation.xml')
test_dic_int_rule = get_dic(
    'perl_classifier/output/system_intuitive_annotation.xml')

# Read Word Vectors
word_vector_file = 'data/mimic3_pp100.txt'
vocab, embd, word_vector_map = loadWord2Vec(word_vector_file)
embedding_dim = len(embd[0])
#embeddings = np.asarray(embd)
rnn.categories, rnn.cat_to_id, rnn.id_to_cat = read_category()

doc = Dom.Document()
예제 #2
0
from data.cnews_loader import get_dic

test_dic = get_dic('perl_classifier/output/perl_annotation6.xml')

test_dic_text_rule = get_dic('perl_classifier/output/prod_134_0_2.xml')
test_dic_int_rule = get_dic('perl_classifier/output/prod_134_0_5.xml')

#positive
f = open('perl_classifier/textual/positive_useful.log', 'r')
content = f.read()
f.close()
tplines = content.split('\n')

f = open('perl_classifier/intuitive/positive_useful.log', 'r')
content = f.read()
f.close()
iplines = content.split('\n')

# questionalble
f = open('perl_classifier/textual/questionable_truly_useful.log', 'r')
content = f.read()
f.close()
tqlines = content.split('\n')

f = open('perl_classifier/intuitive/questionable_truly_useful.log', 'r')
content = f.read()
f.close()
iqlines = content.split('\n')

# negated
f = open('perl_classifier/textual/negated_truly_useful.log', 'r')
예제 #3
0
for line in lines:
    doc_id = line[1:line.find(' ')].strip()
    disease = line[line.find(' ') + 1:line.find('|')].strip()
    keyword = line[line.find('|') + 1:].strip()
    string = doc_id + ',' + disease + ',' + keyword
    print(string)
    useful_line_list.append(string)
    keyword_set.add(keyword)

keyword_list = list(keyword_set)
feature_length = len(keyword_list)
print(feature_length)
print(keyword_list)

train_dic = get_dic('data/Obesity_data/train_groundtruth.xml')
test_dic = get_dic('data/Obesity_data/test_groundtruth.xml')

doc = Dom.Document()
root_node = doc.createElement("diseaseset")
doc.appendChild(root_node)

for key in train_dic:
    train_sub_dic = train_dic[key]
    test_sub_dic = test_dic[key]
    source_node = doc.createElement("diseases")
    source_node.setAttribute("source", key)
    for sub_key in train_sub_dic:
        disease_node = doc.createElement("disease")
        disease_node.setAttribute("name", sub_key)
        train_data_X = []
예제 #4
0
from data.cnews_loader import build_vocab, build_vocab_words, loadWord2Vec, expand_abbr, txt_proc, if_has_YNQ
import run_cnn_combined as cnn
#from rnn_atten import TRNNConfig, TextRNN
#import run_rnn as rnn

f = open('data/CUIs_text_weng.txt', 'r')
content = f.read()
lines = content.split('\n')
f.close()

corpus = []
for line in lines:
    corpus.append(line)
print(len(corpus))

train_dic = get_dic('data/Obesity_data/train_groundtruth.xml')
test_dic = get_dic('data/Obesity_data/test_groundtruth.xml')

test_dic_text_rule = get_dic('perl_classifier/output/prod_134_0_2.xml')
test_dic_int_rule = get_dic('perl_classifier/output/prod_134_0_5.xml')

# Read CUI Vectors
entity_vector_file = 'data/DeVine_etal_200.txt'
entity_vocab, entity_embd, entity_vector_map = loadWord2Vec(entity_vector_file)
entity_embedding_dim = len(entity_embd[0])
#embeddings = np.asarray(embd)
# Read Word Vectors
word_vector_file = 'data/mimic3_pp200.txt'
word_vocab, word_embd, word_vector_map = loadWord2Vec(word_vector_file)
word_embedding_dim = len(word_embd[0])
#embeddings = np.asarray(embd)