Example #1
0
def keydb_get_note(note,dbName='keydb.data'):
    '''
    Args:
        note: an actual pathology note from raw data. 
        dbName: The physical storage for the database
    Returns:
        Dictinoary of key frequencies
    '''
    from get_data_breast import checkAllcancer,get_section
    record = checkAllcancer(note)
    #specimens are not important any more. 
    #record['content'] = get_section(note)
    return keydb_core(record)    
Example #2
0
def get_collection(data):
    collection = {}
    i=0
    while i<len(data):
        input_dict = checkAllcancer(data[i][1])
        for item in input_dict.values():
            for k in item.keys():
                k_clean = keydb_clean_string(k)
                if len(k_clean)==0:
                    break
                k_clean = k_clean[0]
#                k_clean = k                          
                value = item[k][0]
                if value!='' and value!="_":
                    if collection.get(k_clean)==None:
                        collection[k_clean]=[]
                    value = value.replace("_","")
                    collection[k_clean].append(value.lower())
        i+=1
    return collection
def get_collection(data):
    # collect all data after extraction in order to be used as an input for calculating the value score
    from get_data_breast import checkAllcancer

    collection = {}
    i = 0
    while i < len(data):
        input_dict = checkAllcancer(data[i][1])
        for item in input_dict.values():
            for k in item.keys():
                k_clean = keydb_clean_string(k)
                if len(k_clean) == 0:
                    break
                k_clean = k_clean[0]
                #                k_clean = k
                value = item[k][0]
                if value != "" and value != "_":
                    if collection.get(k_clean) == None:
                        collection[k_clean] = []
                    value = value.replace("_", "")
                    collection[k_clean].append(value.lower())
        i += 1
    return collection
Example #4
0
def Extract():
    '''
    depracted. Use jsontest instead. 
    this is a nice web interface for testing. 
    user only needs to input text and a univerid, 
    Returns: 
        json formated result from given text and universe_id
    '''
    args = parser.parse_args()
    note = args['text']
    cancerName = args['universe_id']
    print 'args:',args
    print 'form:',request.form
    #should ahve just check form first then the args. 
    if note == None:
        note = request.form.get('data')
        cancerName = request.form.get('cancer')
        
    if cancerName is None:
        cancerName = ''
        
    if note == None or note.strip()=='':
        return 'No info'
    else:
        result = {}
        try:
            result = checkAllcancer(note)
            result_confidence= result.copy()
            for cancer in result.keys():
                if cancerName.strip() != '':
                    marginaldbname = cancerName.lower()+'.data'
                else:
                    marginaldbname=None
                print 'marginaldbname: ',marginaldbname
                if marginaldbname is not None:
                    marginaldb = keydb_marginal_load(marginaldbname)
                else:
                    marginaldb = keydb_marginal_load()
                ########################################
                # the below code is for getting confidence score. 
                #########################################                
                for k,v in result[cancer].items():
                    #note that v is a list contains value and original value. 
                    value = v[0]
                    #now we can do value processing. 
                    #put your code here
                    
                    #now we can do key confidence processing. 
                    #it needs a library indicating which unverse it belongs to. 
                    #in here we will just try to use our pre-existing libraries. 
                    #namely, if you have breast cancer as cancer, then
                    try:
                        result_confidence[cancer][k].append(keydb_marginal_newkey(k,value,marginaldb,marginaldbname,True))
                    except Exception, err:
                        print 'ERROR: key_confidence failed'
                        print err
                    try:
                        value_score = getScore(k,value,keydb_marginal_load('Valdb.data'))
                        result_confidence[cancer][k].append(' '.join([str(item) for item in value_score.values()]))
                    except Exception, err:
                        print 'ERROR: value_confidence failed'
                        print err

            result['specimens']=get_section(note)