def do_semehr_index(settings, patients, doc_to_patient): """ do SemEHR index :param settings: :param patients: :param doc_to_patient: :return: """ es = EntityCentricES(settings.get_attr(['semehr', 'es_host'])) es.index_name = settings.get_attr(['semehr', 'index']) es.concept_doc_type = settings.get_attr(['semehr', 'concept_doc_type']) es.entity_doc_type = settings.get_attr(['semehr', 'entity_doc_type']) es.doc_level_index = settings.get_attr(['semehr', 'doc_level_index']) f_yodie_anns = settings.get_attr(['yodie', 'output_file_path']) ann_files = [ f for f in listdir(f_yodie_anns) if isfile(join(f_yodie_anns, f)) ] if settings.get_attr(['job', 'semehr-concept']) == 'yes': logging.info('[SemEHR-step] starting semehr-concept process') logging.debug('working on files : %s' % ann_files) # index concepts concept_index = settings.get_attr(['semehr', 'concept_index']) for ann in ann_files: utils.multi_thread_large_file_tasking( join(f_yodie_anns, ann), 10, do_index_100k_anns, args=[es, doc_to_patient, concept_index]) logging.info('[SemEHR-step-end]concept/document level indexing done') if settings.get_attr(['job', 'semehr-patients']) == 'yes': logging.info('[SemEHR-step] indexing annotations at patient level') # index patients es_doc_url = settings.get_attr(['semehr', 'es_doc_url']) es_full_text = Elasticsearch([es_doc_url], serializer=JSONSerializerPython2(), verify_certs=False) ft_index_name = settings.get_attr(['semehr', 'full_text_index']) ft_doc_type = settings.get_attr(['semehr', 'full_text_doc_type']) ft_entity_field = settings.get_attr( ['semehr', 'full_text_patient_field']) ft_fulltext_field = settings.get_attr( ['semehr', 'full_text_text_field']) utils.multi_thread_tasking(patients, 10, do_index_100k_patients, args=[ es, es_full_text, ft_index_name, ft_doc_type, ft_entity_field, ft_fulltext_field ]) logging.info('[SemEHR-step-end]patient level indexing done')
def do_semehr_index(settings, patients, doc_to_patient): """ do SemEHR index :param settings: :param patients: :param doc_to_patient: :return: """ es = EntityCentricES(settings.get_attr(['semehr', 'es_host'])) es.index_name = settings.get_attr(['semehr', 'index']) es.concept_doc_type = settings.get_attr(['semehr', 'concept_doc_type']) es.entity_doc_type = settings.get_attr(['semehr', 'entity_doc_type']) f_yodie_anns = settings.get_attr(['yodie', 'output_file_path']) ann_files = [ f for f in listdir(f_yodie_anns) if isfile(join(f_yodie_anns, f)) ] if settings.get_attr(['job', 'semehr-concept']) == 'yes': print 'working on files : %s' % ann_files # index concepts for ann in ann_files: utils.multi_thread_large_file_tasking(join(f_yodie_anns, ann), 10, do_index_100k_anns, args=[es, doc_to_patient]) if settings.get_attr(['job', 'semehr-patients']) == 'yes': # index patients es_doc_url = settings.get_attr(['semehr', 'es_doc_url']) es_full_text = Elasticsearch([es_doc_url], serializer=JSONSerializerPython2()) ft_index_name = settings.get_attr(['semehr', 'full_text_index']) ft_doc_type = settings.get_attr(['semehr', 'full_text_doc_type']) ft_entity_field = settings.get_attr( ['semehr', 'full_text_patient_field']) ft_fulltext_field = settings.get_attr( ['semehr', 'full_text_text_field']) utils.multi_thread_tasking(patients, 10, do_index_100k_patients, args=[ es, es_full_text, ft_index_name, ft_doc_type, ft_entity_field, ft_fulltext_field ])