예제 #1
0
def all_files(dataset,  mongo):
    chf = []
    indexes = get("all_indexes", dataset=dataset,  mongo=mongo)
    for number_of_card in indexes:
        print('Card #' + number_of_card)
        doc_data = create_dict(dataset,  number_of_card,  mongo)
        code = get("code.json", formula="CHF", mongo=mongo)
        all_steps(code, json_interpretator, doc_data, dataset,  number_of_card,  mongo)
        put("annotations",  doc_data['data'],  
                dataset=dataset,  number_of_card=number_of_card, formula ='CHF',  mongo=mongo)
        if 'ICHF' in doc_data['data']:
            chf.append(number_of_card)
    put("calculated_indexes",  chf,  formula='CHF',  
                dataset=dataset,  mongo=mongo)
    print(str(len(chf)) + ' documents were annotated by ICHF.')
예제 #2
0
 def __init__(self, args, mongo, httpd):
     req = urllib.unquote(args['args'])
     req = json.loads(req)
     print('req' + str(req))
     lock = httpd.mLock
     lock.acquire()
     code = get("code.cla.json", formula=req['formula'], mongo=mongo)
     print('runCode version: ' + code['version'])
     lock.release()
     doc = []
     doc_data = {}
     n = 0
     while True:
         print('Step: ' + str(n))
         doc_data = next_step(doc_data, code, req['ds'], req['id'], n,
                              mongo)
         if doc_data is None:
             break
         doc.append(doc_data)
         # Save to file
         snap_file = open(snap_file_name, 'w')
         snap_file.write(json.dumps(doc, indent=4))
         snap_file.close()
         n += 1
     answer = 'ready'
     self.site = urllib.quote(json.dumps(answer, indent=4))
예제 #3
0
    def generator_of_chunks(self, text, mongo, lock):
        if text[:5] == 'Doc #':
            number_of_card = text[5:]
            lock.acquire()
            nodes = get("doc.html",
                        number_of_card=number_of_card,
                        dataset='cci',
                        mongo=mongo)
            lock.release()
            if nodes is None:
                return
            doc = '\n'.join(nodes)
        else:
            doc = self.split_to_chunks(text, lock)

        computer = socket.gethostname()
        if computer == 'noX540LJ':
            tmp_file = 'tmp/formula.cla'
        else:
            tmp_file = '/home/andrey/work/Claudia/claudia/tmp/formula.cla'
        file = open(tmp_file, 'w')
        file.write(doc)
        file.close()
        try:
            with open(tmp_file, 'rb') as inp:
                sHTML_Parser = etree.HTMLParser(remove_comments=True)
                tree = etree.parse(inp, sHTML_Parser)
                nodes = tree.xpath('/html/body/p')
        except IOError:
            print('No such file or directory: ' + tmp_file)
            return
        s_nodes = []
        for node in nodes:
            s_nodes.append(etree.tostring(node))
        return s_nodes
예제 #4
0
def create_dict(dataset,  number_of_card,  mongo):
    doc_data = {}
    doc_data['data'] = {}
    doc_data['sentences'] = []
    #sHTML_Parser = etree.HTMLParser(remove_comments = True)
    samples = get("doc.html",  dataset=dataset,  
                                number_of_card=number_of_card,  mongo=mongo)
    for node in samples:
        sample = etree.fromstring(node)
        sentence = {}
        sentence['data'] = {}
        sentence['chunks'] = []
        s = etree.tostring(sample)
        ss = etree.fromstring(s)
        for nd_alevel in ss.xpath('/p/span/span'):
            alevel = nd_alevel.attrib
            s = etree.tostring(nd_alevel)
            sss = etree.fromstring(s)
            for nd in sss.xpath('/span/span/span'):
                chunk = {}
                chunk['text'] = nd.text
                chunk['data'] = {}
                chunk['data']['__negation'] = alevel['class'][6:]
                sentence['chunks'].append(chunk)
        doc_data['sentences'].append(sentence)
    return doc_data
예제 #5
0
def taxonomy(text, tax, mongo):
    #tax = par[0]
    #mongo = par[1]
    dict = {}
    filtre = re.compile("\s+", re.M + re.I + re.U)
    tax_file = get("tax.tset", taxonomy=tax, mongo=mongo).split('\n')
    for line in tax_file:
        if line == "" or line[0] != '"':
            continue
        words = line.split('"')
        triped_text = filtre.sub(' ', text)
        triped_word = filtre.sub(' ', words[1])
        if is_word(triped_word, triped_text):
            dict[tax] = text
            flag = True
            key = ''
            for word in words[2:]:
                if flag:
                    flag = False
                    continue
                flag = True
                if key == '':
                    key = filtre.sub(' ', word)
                else:
                    dict[key] = filtre.sub(' ', word)
                    key = ''
    return dict
def all_files(dataset, formula, mongo):
    chf = []
    apost_res = {}
    for diag in apostriory:
        apost_res[diag] = []
    #formula = 'CHF'
    indexes = get("all_indexes", dataset=dataset, mongo=mongo)
    code = get("code.cla.json", formula=formula, mongo=mongo)
    for number_of_card in indexes:
        print('Card #' + number_of_card)
        doc_data = create_dict(dataset, number_of_card, mongo)
        all_steps(code, doc_data, dataset, number_of_card, mongo, False)
        put("annotations",
            doc_data['data'],
            dataset=dataset,
            number_of_card=number_of_card,
            formula=formula,
            mongo=mongo)
        if doc_data['data']['Formula diagnose'] != 'No':
            chf.append(number_of_card)
            print(doc_data['data']['Formula diagnose'])
        if 'value' in doc_data['data'] and doc_data['data'][
                'value'] in apostriory:
            apost_res[doc_data['data']['value']].append(number_of_card)
        else:
            apost_res['not mentioned'].append(number_of_card)
    put("calculated_indexes",
        chf,
        formula=formula,
        dataset=dataset,
        mongo=mongo)
    put("results_apostriory",
        apost_res,
        formula=formula,
        dataset=dataset,
        mongo=mongo)
    print('Apostriory: ' + json.dumps(apost_res, indent=4))
예제 #7
0
def next_step(code, dataset, number_of_card,  step_id,  mongo):
#def next_step(doc_file_name,  code_file_name, snap_file_name, step_id):
    if step_id == 0:
        doc_data = create_dict(dataset, number_of_card,  mongo)
        doc_data = INA(doc_data, mongo)
        snapshot(dataset,  number_of_card,  doc_data,  mongo)
    doc_data = get("snap.json", dataset=dataset,  
                                number_of_card=number_of_card,  mongo=mongo)
    if doc_data is None:
        doc_data = create_dict(dataset,  number_of_card,  mongo)
    for step in code['statements']:
        if 'statementId' in step and step['statementId'] == step_id:
            doc_data = json_interpretator(doc_data, dataset, number_of_card,  step,  mongo)
            break
    snapshot(dataset,  number_of_card,  doc_data,  mongo)
예제 #8
0
    def __init__(self, args, mongo, httpd):
        req = urllib.unquote(args['args'])
        req = json.loads(req)
        print('req: ' + str(req))

        lock = httpd.mLock
        lock.acquire()
        if req['ticket'] == 'admin':
            snap_file = open(snap_file_name, 'r')
            doc = json.loads(snap_file.read())
            snap_file.close()
        else:
            cch = httpd.cch
            #print('Locks: ' + str(cch.mch.mLocks))
            doc = cch.getValue(req['ticket'])
            #print('doc: ' + str(doc))
            if doc is None:
                doc = []
        lock.release()

        if doc == []:
            doc_data = {}
        else:
            doc_data = doc[-1]
        lock.acquire()
        code = get("code.cla.json", formula=req['formula'], mongo=mongo)
        print('GetCode version: ' + code['version'])
        lock.release()
        for n in range(len(doc), req['new_step'] + 1):
            print('Step: ' + str(n))
            doc_data = next_step(doc_data, code, req['ds'], req['id'], n,
                                 mongo)
            doc_copy = copy.deepcopy(doc_data)
            doc.append(doc_copy)
        new_cadres = doc[req['step'] + 1:]

        lock.acquire()
        if req['ticket'] == 'admin':
            snap_file = open(snap_file_name, 'w')
            snap_file.write(json.dumps(doc, indent=4))
            snap_file.close()
        else:
            cch.putValue(req['ticket'], doc)
        lock.release()
        self.site = urllib.quote(json.dumps(new_cadres))
예제 #9
0
def IsNumericAnnotator(text, mongo):
    dict = {}
    # Find a measure
    tax_file = get("tax.tset", taxonomy="DOSAGE", mongo=mongo).split('\n')
    filtre = re.compile("\s+", re.M + re.I + re.U)
    for line in tax_file:
        if line == "" or line[0] != '"':
            continue
        words = line.split('"')
        #end = result.end()
        triped_text = filtre.sub(' ', text)
        triped_word = filtre.sub(' ', words[1])
        #if is_word(triped_word,  triped_text):
        pos = triped_text.find(triped_word)
        neib = triped_text[pos -
                           1:pos] + triped_text[pos + len(triped_word):pos +
                                                1 + len(triped_word)]
        nn = re.findall(r'[A-z]', neib)
        if pos != -1 and nn == []:
            #print('Triped_text: ' + triped_text + ', triped_word: ' + triped_word)
            dict['measure'] = triped_word

    # In 'text' there is a number
    result = re.search(r"[-+]?\d*\.\d+|\d+", text)
    if result is None:
        # There are text only
        #        res = re.search(r'[A-z]+',  text)
        #        if res is not None and res.group(0) == text:
        #            dict['class'] = 'numeric'
        #            dict['type'] = 'text'
        #            dict['value'] = text
        return dict
    dict['class'] = 'numeric'
    # It is a number
    #if re.search(r'[A-z]',  text) is None:
    if result.group(0) == text:
        dict['type'] = 'number'
        dict['value'] = float(result.group(0))
        return dict
    #  It's not a number but contains a number
    else:
        dict['type'] = 'contains_number'
        dict['value'] = float(result.group(0))
        #print('INA: text=' + text + ', number=' + str(result.group(0)))
        return dict
예제 #10
0
def start_annotate(mongo):
    for dataset in datasets:
        indexes = get("all_indexes", dataset=dataset, mongo=mongo)
        for number_of_card in indexes:
            print(dataset + ": card #" + number_of_card)
            key_words = set()
            doc_data = create_dict(dataset, number_of_card, mongo)
            for sentence in doc_data["sentences"]:
                for chunk in sentence["chunks"]:
                    par = (mongo, )
                    for tax in taxes:
                        dict = taxonomy(chunk["text"], tax, mongo)
                        chunk["data"].update(dict)
                        if dict != {}:
                            key_words.add(tax)
                        if dict != {} and (taxes[tax] == []
                                           or taxes[tax][len(taxes[tax]) - 1]
                                           != number_of_card):
                            taxes[tax].append(number_of_card)
                    chunk["data"].update(IsNumericAnnotator(
                        chunk["text"], par))
            put("ch.json",
                doc_data,
                dataset=dataset,
                number_of_card=number_of_card,
                mongo=mongo)
            put("key_words",
                list(key_words),
                dataset=dataset,
                number_of_card=number_of_card,
                mongo=mongo)
        for tax in taxes:
            put('tax.idx',
                taxes[tax],
                taxonomy=tax,
                dataset=dataset,
                mongo=mongo)
예제 #11
0
    def __init__(self, args, mongo, httpd):
        state = urllib.unquote(args['args'])
        state = json.loads(state)
        print(json.dumps(state, indent=4))

        #        lock.acquire()
        #        snap_file = open(snap_file_name,  'w')
        #        snap_file.write(json.dumps([]))
        #        snap_file.close()
        #        lock.release()

        # Code
        lock = httpd.mLock
        lock.acquire()
        code = get("code.cla.json", formula=state['formula'], mongo=mongo)
        print('getInfo version: ' + code['version'])
        lock.release()
        state['code'] = []
        for step in code['source']:
            command = {}
            command['text'] = step['text']
            command['id'] = step['source_id']
            command['changes'] = -1
            command['visible'] = False
            state['code'].append(command)

        # Key words
        lock.acquire()
        state['key_words'] = get('key_words',
                                 number_of_card=state['id'],
                                 dataset=state['ds'],
                                 mongo=mongo)
        lock.release()

        # Initilal document
        lock.acquire()
        doc = get('doc.html',
                  number_of_card=state['id'],
                  dataset=state['ds'],
                  mongo=mongo)
        lock.release()
        state['initial_doc'] = doc

        # Annotations
        lock.acquire()
        doc = get('ch.json',
                  number_of_card=state['id'],
                  dataset=state['ds'],
                  mongo=mongo)
        lock.release()
        state['anns'] = doc

        # Info
        lock.acquire()
        info = get('doc.json',
                   number_of_card=state['id'],
                   dataset=state['ds'],
                   mongo=mongo)
        lock.release()
        if info is None:
            info = {}
        state['info'] = info

        # Ticket
        lock.acquire()
        cch = httpd.cch
        if state['ticket'] != 'admin':
            state['ticket'] = cch.getFreeTicket()
            if state['ticket'] is None:
                state['ticket'] = 'admin'
        lock.release()
        print('ticket: ' + state['ticket'])

        #print(json.dumps(state,  indent=4))

        self.site = urllib.quote(json.dumps(state))
예제 #12
0
    def __init__(self, args, mongo, httpd):
        state = urllib.unquote(args['args'])
        state = json.loads(state)
        # Find all cards selected in pivot table
        lock = httpd.mLock
        lock.acquire()
        ids = get('all_indexes', dataset=state['ds'], mongo=mongo)
        #        list_apostriory = get('calculated_indexes',  dataset=state['ds'],
        #                                            formula=state['formula'],  mongo=mongo)
        lock.release()
        #print('apostriory: ' + str(list_apostriory))
        need_list = []
        for i in range(len(state['selected_cells'])):
            lock.acquire()
            ids3 = get('results_apriory.' + apriory[i],
                       dataset=state['ds'],
                       formula=state['formula'],
                       mongo=mongo)
            print('Apriory (' + apriory[i] + '): ' + str(ids3))
            lock.release()
            for j in range(len(state['selected_cells'][i])):
                #if apostriory[j] == 'Other':
                lock.acquire()
                list_apostriory = get('results_apostriory.' + apostriory[j],
                                      dataset=state['ds'],
                                      formula=state['formula'],
                                      mongo=mongo)
                print('Apostriory (' + apostriory[j] + '): ' +
                      str(list_apostriory))
                lock.release()
                #                if apostriory[j] == 'Other':
                #                    ids2 = difference(ids,  list_apostriory)
                #                else:
                #                    ids2 = list_apostriory
                if state['ds'] != 'cci':
                    ids3 = ids
                #list = intersection(ids2,  ids3)
                list = intersection(list_apostriory, ids3)
                if state['selected_cells'][i][j]['selected']:
                    need_list = union(need_list, list)
                state['selected_cells'][i][j]['count'] = len(list)

        # Find cards with the taxonomy only
        tax = state['tax']
        if tax['tax'] == 'None':
            ids1 = ids
        else:
            lock.acquire()
            ids1 = get("tax.idx",
                       dataset=state['ds'],
                       taxonomy=tax['tax'],
                       mongo=mongo)
            lock.release()
        if not tax['flag']:
            need_list = intersection(need_list, ids1)
        else:
            need_list = difference(need_list, ids1)

        # Sort list of cards
        number_list = []
        for id in need_list:
            number_list.append(int(id))
        number_list.sort()
        need_list = []
        for number in number_list:
            need_list.append(str(number))
        state['count'] = len(need_list)

        # Data of every card
        cards_in_one_portion = 100
        chf = {}
        if state['portion'] * cards_in_one_portion > len(need_list):
            state['portion'] = 0
        cut_need_list = need_list[state['portion'] *
                                  cards_in_one_portion:(state['portion'] + 1) *
                                  cards_in_one_portion]
        for stat in apriory:
            lock.acquire()
            chf[stat] = get("results_apriory." + stat,
                            dataset=state['ds'],
                            formula=state['formula'],
                            mongo=mongo)
            lock.release()
        for id in cut_need_list:
            card = {}
            card['id'] = id
            lock.acquire()
            card['size'] = str(
                get("size_of_doc",
                    dataset=state['ds'],
                    number_of_card=id,
                    mongo=mongo))
            lock.release()
            card['diagnosis'] = []
            for stat in apriory:
                if id in chf[stat]:
                    card['diagnosis'].append(state['formula'] + '-' + stat)
            lock.acquire()
            abs = get('abstract',
                      dataset=state['ds'],
                      number_of_card=id,
                      mongo=mongo)
            lock.release()
            abs = abs.replace('>', '>')
            abs = abs.replace('<', '&lt;')
            abs = abs.replace('&', '&amp;')
            card['abstract'] = abs
            state['list'].append(card)
        self.site = urllib.quote(json.dumps(state))
예제 #13
0
    def __init__(self, args, mongo, httpd):
        print('Run redactor.')
        #        thread = threading.currentThread().getName()
        #        lock = httpd.mLocks[thread]
        lock = httpd.mLock

        req = urllib.unquote(args['args'])
        req = json.loads(req)
        formula = req['formula']
        text = urllib.unquote(req['doc'])
        ticket = req['ticket']

        lock.acquire()
        code = get('code.cla.json', formula=formula, mongo=mongo)
        lock.release()

        state = {}
        state['formula'] = formula
        state['step'] = 'Generation of chunks...'
        lock.acquire()
        httpd.results[ticket] = state
        lock.release()
        print('state: ' + str(state))
        doc = self.generator_of_chunks(text, mongo, lock)
        if doc is None:
            self.site = 'File ' + text[5:] + ' is not found.'
            return

        #formula_name = "Formula was generated by ClaudiaRedactor. " + "Date: Today."
        state['step'] = 'Compile the formula...'
        lock.acquire()
        httpd.results[ticket] = state
        lock.release()
        print('state: ' + str(state))
        computer = socket.gethostname()
        if computer == 'noX540LJ' and text[:5] != 'Doc #':
            self.site = 'It is not a server.'
            return
        #code = start_compilator(formula,  formula_name)

        doc_data = create_dict_by_doc(doc)
        state['count_of_steps'] = code['count_of_steps']
        for n in range(code['count_of_steps'] + 1):
            lock.acquire()
            state['step'] = 'Apply the formula...'
            state['current_step'] = n
            print('Step: ' + str(n))
            httpd.results[ticket] = state
            lock.release()
            doc_data = next_step(doc_data, code, None, None, n, mongo)
        #doc_data = for_one_doc(doc,  code,  mongo,  cch,  ticket,  lock)

        res = {}
        res['formula'] = formula
        if 'value' in doc_data['data']:
            res['diagnose'] = formula + '-' + doc_data['data']['value']
        else:
            res['diagnose'] = formula + ' - ' + 'not mentioned'
        if text[:5] == 'Doc #':
            lock.acquire()
            js = get('doc.json',
                     number_of_card=text[5:],
                     dataset='cci',
                     mongo=mongo)
            lock.release()
            print('state: ' + str(state))
            for key in js:
                if key.find(formula) != -1:
                    res['apriory'] = str(key)
        res['sentences'] = []
        for sentence in doc_data['sentences']:
            if len(sentence['data']) < 2:
                continue
            attr = ''
            for key in sentence['data']:
                if key == 'reject':
                    continue
                attr += key + ': ' + sentence['data'][key] + '; '
            sent = {}
            sent['attr'] = attr
            sent['sent'] = ''
            for chunk in sentence['chunks']:
                sent['sent'] += chunk['text'] + ' '
            res['sentences'].append(sent)


#        results = '<p class="res_paragraph">Diagnose:</p>'
#        results += '<p>' + formula + '-' + doc_data['data']['value'] + '</p>'
#        if text[:5] == 'Doc #':
#            lock.acquire()
#            js = get('doc.json',  number_of_card = text[5:],  dataset='cci',  mongo = mongo)
#            lock.release()
#            print('state: ' + str(state))
#            for key in js:
#                if key.find('CHF') != -1:
#                    results += '<p class="res_paragraph">Apriory:</p>'
#                    results += '<p>' + str(key) + '</p>'
#        results += '<p class="res_paragraph">Sentences with untrivial annotations:</p>'
#        for sentence in doc_data['sentences']:
#            if len(sentence['data']) < 2:
#                continue
#            attr = ''
#            for key in sentence['data']:
#                if key == 'reject':
#                    continue
#                attr += key + ': ' + sentence['data'][key] + '; '
#            p = '<p class="sentence_attr"><b>Sentence attributes: </b>' + attr + '</p>'
#            results += p
#            sent = ''
#            for chunk in sentence['chunks']:
#                sent += chunk['text'] + ' '
#            p = '<p class="res_sentence">' + sent + '</p>'
#            results += p
#        results += '<p> </p>'

        state['step'] = 'Ready.'
        state['res'] = res
        lock.acquire()
        cch = httpd.cch
        cch.putValue(ticket, state)
        lock.release()
        print('state: ' + '<document>')
        self.site = urllib.quote(json.dumps(res))
예제 #14
0
        message = 'Line ' + str(ret['line']) + ': ' + ret['message']
        print(message)
        sys.exit()
    else:
        code = {}
        code['rulename'] = claudia_file_name
        code['version'] = '0.1'
        code['declarations'] = negation
        code['statements'] = ret['action']
        code['source'] = ret['source']
        code['count_of_steps'] = len(ret['steps'])
        #code['annotations'] = ret['annotations']
        for source in code['source']:
            #print('source-id: ' + str(source['source_id']))
            while source['text'].find('  ') != -1:
                source['text'] = source['text'].replace('  ', ' ')
        return code


if __name__ == '__main__':
    mongo = connect()
    for claudia_file_name in ['CHF', 'MI']:
        claudia = get('code.cla', formula=claudia_file_name, from_file=True)
        code = start_compilator(claudia, claudia_file_name)
        file = open('cci/claudia_rules/' + claudia_file_name + '.cla.json',
                    'w')
        file.write(json.dumps(code, indent=4))
        file.close()
        put('code.cla.json', code, formula=claudia_file_name, mongo=mongo)
    print('Ok.')
def create_dict(dataset, patient, mongo):
    #sHTML_Parser = etree.HTMLParser(remove_comments = True)
    doc = get("doc.html", dataset=dataset, number_of_card=patient, mongo=mongo)
    return create_dict_by_doc(doc)