import xml.dom.minidom as Dom import sys import os import shutil import numpy as np from keras.preprocessing import text import tensorflow as tf import random from rnn_atten import TRNNConfig, TextRNN from data.cnews_loader import read_vocab, read_category, batch_iter, process_file, clean_wds, get_dic from data.cnews_loader import build_vocab, build_vocab_words, loadWord2Vec, expand_abbr, txt_proc from data.cnews_loader import read_category_textual, read_category_intuitive import run_rnn as rnn train_dic = get_dic('data/Obesity_data/train_groundtruth.xml') test_dic = get_dic('data/Obesity_data/test_groundtruth.xml') test_dic_text_rule = get_dic( 'perl_classifier/output/system_textual_annotation.xml') test_dic_int_rule = get_dic( 'perl_classifier/output/system_intuitive_annotation.xml') # Read Word Vectors word_vector_file = 'data/mimic3_pp100.txt' vocab, embd, word_vector_map = loadWord2Vec(word_vector_file) embedding_dim = len(embd[0]) #embeddings = np.asarray(embd) rnn.categories, rnn.cat_to_id, rnn.id_to_cat = read_category() doc = Dom.Document()
from data.cnews_loader import get_dic test_dic = get_dic('perl_classifier/output/perl_annotation6.xml') test_dic_text_rule = get_dic('perl_classifier/output/prod_134_0_2.xml') test_dic_int_rule = get_dic('perl_classifier/output/prod_134_0_5.xml') #positive f = open('perl_classifier/textual/positive_useful.log', 'r') content = f.read() f.close() tplines = content.split('\n') f = open('perl_classifier/intuitive/positive_useful.log', 'r') content = f.read() f.close() iplines = content.split('\n') # questionalble f = open('perl_classifier/textual/questionable_truly_useful.log', 'r') content = f.read() f.close() tqlines = content.split('\n') f = open('perl_classifier/intuitive/questionable_truly_useful.log', 'r') content = f.read() f.close() iqlines = content.split('\n') # negated f = open('perl_classifier/textual/negated_truly_useful.log', 'r')
for line in lines: doc_id = line[1:line.find(' ')].strip() disease = line[line.find(' ') + 1:line.find('|')].strip() keyword = line[line.find('|') + 1:].strip() string = doc_id + ',' + disease + ',' + keyword print(string) useful_line_list.append(string) keyword_set.add(keyword) keyword_list = list(keyword_set) feature_length = len(keyword_list) print(feature_length) print(keyword_list) train_dic = get_dic('data/Obesity_data/train_groundtruth.xml') test_dic = get_dic('data/Obesity_data/test_groundtruth.xml') doc = Dom.Document() root_node = doc.createElement("diseaseset") doc.appendChild(root_node) for key in train_dic: train_sub_dic = train_dic[key] test_sub_dic = test_dic[key] source_node = doc.createElement("diseases") source_node.setAttribute("source", key) for sub_key in train_sub_dic: disease_node = doc.createElement("disease") disease_node.setAttribute("name", sub_key) train_data_X = []
from data.cnews_loader import build_vocab, build_vocab_words, loadWord2Vec, expand_abbr, txt_proc, if_has_YNQ import run_cnn_combined as cnn #from rnn_atten import TRNNConfig, TextRNN #import run_rnn as rnn f = open('data/CUIs_text_weng.txt', 'r') content = f.read() lines = content.split('\n') f.close() corpus = [] for line in lines: corpus.append(line) print(len(corpus)) train_dic = get_dic('data/Obesity_data/train_groundtruth.xml') test_dic = get_dic('data/Obesity_data/test_groundtruth.xml') test_dic_text_rule = get_dic('perl_classifier/output/prod_134_0_2.xml') test_dic_int_rule = get_dic('perl_classifier/output/prod_134_0_5.xml') # Read CUI Vectors entity_vector_file = 'data/DeVine_etal_200.txt' entity_vocab, entity_embd, entity_vector_map = loadWord2Vec(entity_vector_file) entity_embedding_dim = len(entity_embd[0]) #embeddings = np.asarray(embd) # Read Word Vectors word_vector_file = 'data/mimic3_pp200.txt' word_vocab, word_embd, word_vector_map = loadWord2Vec(word_vector_file) word_embedding_dim = len(word_embd[0]) #embeddings = np.asarray(embd)