コード例 #1
0
def delete_file(filename):
    """delete file"""
    global api
    if '.json' in filename and filename in api.shared_folder_manager.get_file_names_in_folder():
        try:
            file_path = os.path.join(BASE_DIR, SHARE_FOLDER, filename)
            dataset_folder = os.path.join(BASE_DIR, DATASET_FOLDER)
            dataset_path = os.path.join(dataset_folder, filename).replace('.jsonl', '.data').replace('.json', '.data')
            remove_file(file_path)
            remove_file(dataset_path)
            api.dataset_status.pop(filename)
            dataset_status_file_path = os.path.join(dataset_folder, DATASET_STATUS_FILE)
            write_json(api.dataset_status, dataset_status_file_path)
            api.selected_dataset = None
            return make_response(jsonify({'message': '{0} deleted'.format(filename)}), 200)
        except Exception as e:
            return make_response(jsonify({'message': '{0}'.format(e)}), 400)
    else:
        dataset_folder = os.path.join(BASE_DIR, DATASET_FOLDER)
        dataset_path = os.path.join(dataset_folder, filename).replace('.jsonl', '.data').replace('.json', '.data')
        remove_file(dataset_path)
        api.dataset_status.pop(filename)
        dataset_status_file_path = os.path.join(dataset_folder, DATASET_STATUS_FILE)
        write_json(api.dataset_status, dataset_status_file_path)
        api.selected_dataset = None
        return make_response(jsonify({'message': '{0} deleted'.format(filename)}), 200)
コード例 #2
0
def build_med_terminology(terminology_file_path, entity_name=None, save=False):
    global added_terminology, split_characters, global_specialist_lexicon_parser
    if entity_name is None:
        entity_name = get_basename(terminology_file_path).replace('.txt', '')
    with open(terminology_file_path, 'r', encoding='utf-8', errors='replace') as fp:
        for line in fp:
            code, attr, desc, generic_code, generic_terminology, terminology_entry_type = \
                normalize_line_of_terminology(line)
            if attr in ['SY', 'PT']:
                tags = {
                    'cat': 'noun',
                    't2': {
                        'code': code,
                        'entity': entity_name
                    }
                }
                terminologies = normalize_and_expand_to_build_terminology(desc, terminology_entry_type, code,
                                                                          entity_name)
                for terminology in terminologies:
                    if (code, terminology, terminology_entry_type, entity_name) not in added_terminology:
                        global_specialist_lexicon_parser.build_trie(terminology, tags)
                        added_terminology.add((code, terminology, terminology_entry_type, entity_name))
                if generic_code and generic_terminology:
                    terminology = generic_terminology.strip()
                    if (generic_code, terminology, terminology_entry_type, entity_name) not in added_terminology:
                        tags['t2_code'] = generic_code
                        global_specialist_lexicon_parser.build_trie(terminology, tags)
                        added_terminology.add((generic_code, terminology, terminology_entry_type, entity_name))
    if save:
        save_specialist_lexicon_parser()
    write_json(list(added_terminology), '{0}_added.json'.format(entity_name))
コード例 #3
0
def clean_orphan_dataset():
    global api
    orphan_dataset = []
    for ds in api.dataset_status.keys():
        if ds == 'updated':
            continue
        dataset_path = os.path.join(DATASET_DIR, ds.replace('.jsonl', '.data').replace('.json', '.data'))
        if not os.path.exists(dataset_path):
            orphan_dataset.append(ds)
    if len(orphan_dataset) > 0:
        for ds in orphan_dataset:
            api.dataset_status.pop(ds)
        dataset_status_file = os.path.join(DATASET_DIR, DATASET_STATUS_FILE)
        write_json(api.dataset_status, dataset_status_file)
コード例 #4
0
def api_skip():
    """Accept infer results of current dataset"""
    global api
    if request.method == 'POST':
        context, entity_type, extracted_code, original_highlighted, inprogress = get_next_dataset_context()
        api.dataset[context]['skipped'] = True
        dataset_file_path = os.path.join(DATASET_DIR,
                                         api.selected_dataset.replace('.jsonl', '.data').replace('.json', '.data'))
        write_json(api.dataset, dataset_file_path)
        api.dataset_status[api.selected_dataset]['processing_dataset'] -= 1
        api.dataset_status[api.selected_dataset]['skipped_dataset'] += 1
        api.dataset_status['updated'] = datetime.now().strftime(DATETIME_FORMAT)
        dataset_status_file_path = os.path.join(DATASET_DIR, DATASET_STATUS_FILE)
        write_json(api.dataset_status, dataset_status_file_path)

        return infer_next_code()
コード例 #5
0
def api_reject_and_learn_code():
    """Accept infer results of current dataset"""
    global api
    if request.method == 'POST':
        new_code = request.json['new_code']
        new_code_terminology = request.json['new_code_terminology']
        highlighted = request.json['highlighted']
        context, entity_type, extracted_code, original_highlighted, inprogress = get_next_dataset_context()
        api.dataset[context]['rejected'] = {
            'selected': 'new_learn',
            'new_code_terminology': new_code_terminology,
            'highlighted': highlighted,
            'code': new_code,
        }
        dataset_file_path = os.path.join(DATASET_DIR,
                                         api.selected_dataset.replace('.jsonl', '.data').replace('.json', '.data'))
        write_json(api.dataset, dataset_file_path)
        api.dataset_status[api.selected_dataset]['processing_dataset'] -= 1
        api.dataset_status[api.selected_dataset]['rejected_dataset'] += 1
        api.dataset_status['updated'] = datetime.now().strftime(DATETIME_FORMAT)
        dataset_status_file_path = os.path.join(DATASET_DIR, DATASET_STATUS_FILE)
        write_json(api.dataset_status, dataset_status_file_path)

        return infer_next_code()
コード例 #6
0
def infer_next_code():
    """find code from unprocessed dataset"""
    global api
    find_code_results = []
    context, entity_type, extracted_code, original_highlighted, inprogress = get_next_dataset_context()
    if context is None:
        response = {
            "message": "All contexts were processed.",
            "method": "POST",
            "results": [],
            "status-code": 200
        }
        return make_response(jsonify(response), response.get("status-code", 400))
    # if original_highlighted in ['', None]:
    processed_context_lines = (
        context.lower().replace('\\n', '\n').replace('\n\n', '\n')
        .replace(';', '\n').replace(':', '\n').replace('.', ' . ').split('\n')
    )
    payloads = generate_payload_by_line(processed_context_lines, entity_type=entity_type)
    # else:
    #     processed_context_lines = context.lower().replace('\\n', '\n').replace('\n\n', '\n').split('\n')
    #     payloads = generate_payload_by_highlighted(original_highlighted, processed_context_lines,
    #                                                entity_type=entity_type)
    response = {}
    for payload in payloads:
        r = get_t2_find_code(payload)
        if r.status_code == 200:
            response = json.loads(r.content)
            for result in response['results']:
                result['synonym'] = payload['concept_text']
            find_code_results.extend(response['results'])
        else:
            pass

    sorted_results = sorted(find_code_results, key=lambda x: (x['confidence']), reverse=True)
    sorted_top_concept = sort_by_code_weight_with_same_parent(sorted_results)
    response['results'] = sorted_top_concept
    """
    if original_highlighted in ['', None]:
        selected_concept = sorted_top_concept[0].get('synonym', sorted_top_concept[0].get('preferred_terminology'))
        # selected_concept = sorted_top_concept[0].get('preferred_terminology', [sorted_top_concept[0].get('synonym')])[0]
    else:
        selected_concept = original_highlighted
    response_context_lines = (
        context.replace('\\n', '\n').replace('\n\n', '\n')
        .replace(';', '\n').replace(':', '\n').replace('.', ' . ').split('\n')
    )
    index = 0
    selected_concept_tokens = set(selected_concept.lower().replace(';', '').replace(':', '').split())
    for processed_context in processed_context_lines:
        processed_context_tokens = set(processed_context.split())
        if selected_concept_tokens.intersection(processed_context_tokens) == selected_concept_tokens:
            response_context_lines[index] = get_highlight_from_concept(response_context_lines[index], selected_concept)
        index += 1
    response['context'] = '<br>'.join(response_context_lines)
    """
    response['context'] = context
    entity_codes = []
    if entity_type in med_terminology_code_verbose:
        for code, detail in med_terminology_code_verbose[entity_type].items():
            if code in med_terminology_code_tree:
                terminology = med_terminology_code_tree[code].get('PT', detail.get('SY', detail.get('STY'))[0])
            else:
                terminology = detail.get('SY', detail.get('STY'))[0]
            entity_codes.append([code, terminology])
    response['entity_codes'] = entity_codes
    response['entity_type'] = entity_type
    response['extracted_code'] = extracted_code
    response['original_highlighted'] = original_highlighted
    response['current_process'] = api.selected_dataset
    if len(sorted_top_concept) == 0:
        response['message'] = "OK"
        response['match_with_extracted'] = False
    else:
        if extracted_code and extracted_code == sorted_top_concept[0]['code']:
            response['match_with_extracted'] = True
        elif extracted_code:
            response['match_with_extracted'] = False
        response['message'] = "OK"
    jsonify_response = jsonify(response)
    api.dataset[context]['inferred'] = sorted_top_concept
    dataset_file_path = os.path.join(DATASET_DIR,
                                     api.selected_dataset.replace('.jsonl', '.data').replace('.json', '.data'))
    write_json(api.dataset, dataset_file_path)
    if not inprogress:
        api.dataset_status[api.selected_dataset]['processing_dataset'] += 1
        api.dataset_status[api.selected_dataset]['not_started'] -= 1
    api.dataset_status['updated'] = datetime.now().strftime(DATETIME_FORMAT)
    dataset_status_file_path = os.path.join(DATASET_DIR, DATASET_STATUS_FILE)
    write_json(api.dataset_status, dataset_status_file_path)

    return make_response(jsonify_response, response.get("status-code", 400))