def main(): df = pds.read_excel('data/raw/symblos.xlsx', encoding="utf-8") symptom_dict, words = get_symptom() fl = open('data/ontology/symptom.ttl', 'w') print len(df) fl.write(all()) l = len(df) line_s = list() for index, symptom in df.iterrows(): symptom_uri = symptom_dict.get(symptom[3]) #print symptom if type(symptom[9]) is unicode: line_s.append( 'symptom:Q%s rdfs:comment "%s"@cn .' % (symptom_uri, remove_all_html(symptom[9]).encode('utf-8'))) line_s.append('symptom:Q%s rdfs:label "%s"@cn .' % (symptom_uri, symptom[3].encode('utf-8'))) line_s.append(build_disease(symptom, symptom_uri)) line_s.append(build_department(symptom, symptom_uri)) line_s.append(build_symptom(symptom, symptom_uri)) line_s.append(build_part(symptom, symptom_uri)) build_food(symptom, symptom_uri) #fl.write('\n\n') print 'finished: %d: %d' % (index, l) fl.write('\n\n'.join(line_s)) fl.close()
def main(): df = pds.read_excel('data/raw/disease.xlsx', encoding='utf-8') disease_dict, words = get_disease() fl = open('data/ontology/disease.ttl', 'w') fl.write(all()) fl.write('\n\n') l = len(df) cols = df.columns for index, row in df.iterrows(): #print row disease_uri = disease_dict.get(row[3]) if type(row[5]) is unicode: fl.write('disease:Q%s rdfs:comment "%s"@cn .' % (disease_uri, remove_all_html(row[5]).encode('utf-8'))) fl.write('\n\n') fl.write('disease:Q%s rdfs:label "%s"@cn .' % (disease_uri, row[3].encode('utf-8'))) fl.write('\n\n') department_str = build_department(row, disease_uri) fl.write(department_str) fl.write('\n\n') #print department_str sym_str = build_symptom(row, index) fl.write(sym_str) fl.write('\n\n') #print sym_str check_str = build_check(row, disease_uri) fl.write(check_str) fl.write('\n\n') #print check_str disease_str = build_disease(row, disease_uri) fl.write(disease_str) fl.write('\n\n') #print disease_str medicine_str = build_medicine(row, index) fl.write(medicine_str) fl.write('\n\n') other_str = build_other(row, index, cols) fl.write(other_str.encode('utf-8')) fl.write('\n\n') print('finised:%d of %d' % (index, l)) #print medicine_str fl.close()
def main(): word_dict, words = get_check() fl = open('data/ontology/check.ttl', 'w') line = list() fl.write(all()) fl.write('\n\n') for index, row in enumerate(words): line.append('check:Q%d rdfs:label "%s"@cn .' % (index, row)) fl.write('\n\n'.join(line).encode('utf-8'))
def main(): df = pds.read_csv('word/properties/medicine.csv') pro = open('data/ontology/pro.ttl', 'w') pro.write(all()) pro.write('\n\n\n') for index, row in df.iterrows(): pro.write('prom:P%d rdfs:label "%s"@cn .\n' % (index, row['name'])) pro.close()
def main(): department_1, department_2 = get_department() fl_1 = open('data/ontology/department_1.ttl', 'w') fl_2 = open('data/ontology/department_2.ttl', 'w') fl_1.write(all()) fl_2.write(all()) line_1_s = list() line_2_s = list() fl_1.write('\n\n') fl_2.write('\n\n') for index, row in enumerate(department_1): line_1_s.append('department_1:Q%d rdfs:label "%s"@cn .' % (index, row)) for index, row in enumerate(department_2): line_2_s.append('department_2:Q%d rdfs:label "%s"@cn .' % (index, row)) fl_1.write('\n'.join(line_1_s).encode('utf-8')) fl_1.close() fl_2.write('\n'.join(line_2_s).encode('utf-8')) fl_2.close()
def main(): word_dict, words = get_component() fl = open('data/ontology/component_desc.ttl', 'w') df = pds.read_csv('data/word/components_word_new.csv', encoding='utf-8') line = list() fl.write(all()) fl.write('\n\n') for index, row in df.iterrows(): if word_dict.get(row[0]): line.append('ele:Q%d rdfs:comment "%s"@cn .' % (word_dict.get(row[0]), remove_all_html(row[1]))) fl.write('\n\n'.join(line).encode('utf-8'))
def build_product(): medicine_dict, words = get_medicine_product() fl = open('data/ontology/medicine_product.ttl', 'w') line_s = list() for index, row in enumerate(words): print row print index print '------>' #fl.write('medicine:Q%d rdfs:label "%s"@cn' % (index, row)) line_s.append('drug:Q%d rdfs:label "%s"@cn .' % (index, row)) fl.write(all()) fl.write('\n\n'.join(line_s).encode('utf-8')) fl.close()
def main(): medicine_dict = get_medicine() df = pds.read_csv('word/properties/medicine.csv', encoding="utf-8") m_words = get_words() m_dict = element(m_words) medicine_x = pds.read_excel('data/raw/medicine.xlsx', encoding="utf-8") medicine_o = open('data/triple/medicine.ttl', 'w') ls = df['name'] dict_name = {} for index, x in enumerate(ls): dict_name[x] = 'P%d' % index medicine_o.write(all()) medicine_o.write('\n\n\n') man_dict = manufacturer() dosage_dict = dosage_form() formula_dict = get_formula() generic_dict = get_generic_medicine() line_s = list() l = len(medicine_x) for index, medicine in medicine_x.iterrows(): #print medicine #medicine_uri = medicine_dict.get(medicine[2]) print medicine[2] medicine_uri = get_by_keyword(medicine[2]) print medicine_uri print '===============' medicine_uri = int(medicine_uri.id) #print medicine[30] #print medicine[2] for col_i, col in enumerate(medicine_x.columns): #print col pid = dict_name.get(col) prop = medicine.get(col) #print prop #print pid if col == u'是否医保' and prop == 1: line_s.append('drug:Q%d prom:%s %s . \n' % (medicine_uri, 'P30', 'medicine:Q1')) continue if pid and col == u'分子量' and prop and type(prop) is float and not math.isnan(prop): line_s.append('drug:Q%d prom:%s %s . \n' % (medicine_uri, 'P18', prop)) continue if pid and prop and type(prop) is unicode: if col == u'生产企业': #pass line_s.append('drug:Q%d prom:%s org:%s . \n' % (medicine_uri, dict_name.get(col), man_dict.get(prop))) continue elif col == u'主要成份': ele_str = remove_html(prop) els = extrat(m_words, ele_str) if len(els) > 0: for el in els: el_pro = m_dict.get(el) line_s.append('drug:Q%d prom:%s ele:Q%s . \n' % (medicine_uri, dict_name.get(col), el_pro)) continue #print ','.join(els) elif col == u'适应症': line_s.append(indications(prop, medicine_uri, 'P8')) line_s.append('\n') line_s.append(indications_sym(prop, medicine_uri, 'P8')) line_s.append('\n') continue elif col == u'剂型': form_str = remove_special_character(prop) line_s.append('drug:Q%d prom:P31 dosage_form:Q%d .' % (medicine_uri, dosage_dict.get(form_str))) line_s.append('\n') continue elif col == u'药品名称': line_s.append('drug:Q%d prom:P1 drug_generic:Q%d .' % (medicine_uri, generic_dict.get(prop))) line_s.append('\n') continue elif col == u'禁忌': line_s.append(indications(prop, medicine_uri, 'P11')) line_s.append('\n') line_s.append(indications_sym(prop, medicine_uri, 'P11')) line_s.append('\n') continue elif col == u'分子式': formula_str = format_formula(prop) if formula_str and formula_dict.get(formula_str): line_s.append('drug:Q%d prom:%s formula:Q%s . \n' % (medicine_uri, 'P17', formula_dict.get(formula_str))) line_s.append('\n') continue elif col == u'不良反应': #line_s.append(indications(prop, medicine_uri, 'P11')) #line_s.append('\n') line_s.append(indications_sym(prop, medicine_uri, 'P10')) line_s.append('\n') continue else: prop = remove_html(prop) #pass line_s.append('drug:Q%d prom:%s "%s"@cn . \n' % (medicine_uri, dict_name.get(col), remove_all_html(prop).encode('utf-8'))) line_s.append('\n') continue line_s.append('\n\n\n') print '<---------:%d of %d' % (index, l) medicine_o.write(''.join(line_s)) medicine_o.close()