def keydb_get_note(note,dbName='keydb.data'): ''' Args: note: an actual pathology note from raw data. dbName: The physical storage for the database Returns: Dictinoary of key frequencies ''' from get_data_breast import checkAllcancer,get_section record = checkAllcancer(note) #specimens are not important any more. #record['content'] = get_section(note) return keydb_core(record)
def get_collection(data): collection = {} i=0 while i<len(data): input_dict = checkAllcancer(data[i][1]) for item in input_dict.values(): for k in item.keys(): k_clean = keydb_clean_string(k) if len(k_clean)==0: break k_clean = k_clean[0] # k_clean = k value = item[k][0] if value!='' and value!="_": if collection.get(k_clean)==None: collection[k_clean]=[] value = value.replace("_","") collection[k_clean].append(value.lower()) i+=1 return collection
def get_collection(data): # collect all data after extraction in order to be used as an input for calculating the value score from get_data_breast import checkAllcancer collection = {} i = 0 while i < len(data): input_dict = checkAllcancer(data[i][1]) for item in input_dict.values(): for k in item.keys(): k_clean = keydb_clean_string(k) if len(k_clean) == 0: break k_clean = k_clean[0] # k_clean = k value = item[k][0] if value != "" and value != "_": if collection.get(k_clean) == None: collection[k_clean] = [] value = value.replace("_", "") collection[k_clean].append(value.lower()) i += 1 return collection
def Extract(): ''' depracted. Use jsontest instead. this is a nice web interface for testing. user only needs to input text and a univerid, Returns: json formated result from given text and universe_id ''' args = parser.parse_args() note = args['text'] cancerName = args['universe_id'] print 'args:',args print 'form:',request.form #should ahve just check form first then the args. if note == None: note = request.form.get('data') cancerName = request.form.get('cancer') if cancerName is None: cancerName = '' if note == None or note.strip()=='': return 'No info' else: result = {} try: result = checkAllcancer(note) result_confidence= result.copy() for cancer in result.keys(): if cancerName.strip() != '': marginaldbname = cancerName.lower()+'.data' else: marginaldbname=None print 'marginaldbname: ',marginaldbname if marginaldbname is not None: marginaldb = keydb_marginal_load(marginaldbname) else: marginaldb = keydb_marginal_load() ######################################## # the below code is for getting confidence score. ######################################### for k,v in result[cancer].items(): #note that v is a list contains value and original value. value = v[0] #now we can do value processing. #put your code here #now we can do key confidence processing. #it needs a library indicating which unverse it belongs to. #in here we will just try to use our pre-existing libraries. #namely, if you have breast cancer as cancer, then try: result_confidence[cancer][k].append(keydb_marginal_newkey(k,value,marginaldb,marginaldbname,True)) except Exception, err: print 'ERROR: key_confidence failed' print err try: value_score = getScore(k,value,keydb_marginal_load('Valdb.data')) result_confidence[cancer][k].append(' '.join([str(item) for item in value_score.values()])) except Exception, err: print 'ERROR: value_confidence failed' print err result['specimens']=get_section(note)