def jsonProcessor(dt,data,metaMapBinDir): dummy = 1 neg_pos = ['impression_negated','impression_positive'] negativePhrases = [] positivePhrases = [] completeList = [] positiveSentenceList = [] negativeSentenceList = [] presence_negatives = True presence_positives = True if len(data[0][neg_pos[0]]) == 0: presence_negatives = False if len(data[0][neg_pos[1]]) == 0: presence_positives = False if presence_negatives: s = data[0][neg_pos[0]] num_pos = [] dot_pos = [] for ct in range(len(s)): try: if s[ct].isdigit() and s[ct+1] == '.': num_pos.append(ct) except: dummy= 0 if s[ct] == '.': dot_pos.append(ct) if (len(num_pos) == 0 and len(dot_pos) == 1 and dot_pos[0] > 0.75*len(s)) or (len(num_pos) == 0 and len(dot_pos) == 0): negativeSentenceList.append(s) elif len(num_pos) == 0 and len(dot_pos) >= 1: # handling sentences with just full stops for i in range(len(dot_pos)): if i==0: end = dot_pos[i] negativeSentenceList.append(s[0:end]) else: start = dot_pos[i-1] end = dot_pos[i] negativeSentenceList.append(s[start+1:end]) if dot_pos[-1] < len(s)-1: start = dot_pos[-1]+1 negativeSentenceList.append(s[start:]) else: # If its a numbered list for i in range(len(num_pos)): try: start = num_pos[i]+2 end = num_pos[i+1]-3 negativeSentenceList.append(s[start:end+1]) except: start = num_pos[i]+2 negativeSentenceList.append(s[start:]) if presence_positives: s = data[0][neg_pos[1]] num_pos = [] dot_pos = [] for ct in range(len(s)): try: if s[ct].isdigit() and s[ct+1] == '.': num_pos.append(ct) except: dummy= 0 if s[ct] == '.': dot_pos.append(ct) if (len(num_pos) == 0 and len(dot_pos) == 1 and dot_pos[0] > 0.75*len(s)) or (len(num_pos) == 0 and len(dot_pos) == 0): positiveSentenceList.append(s) elif len(num_pos) == 0 and len(dot_pos) >= 1: # handling sentences with just full stops for i in range(len(dot_pos)): if i==0: end = dot_pos[i] positiveSentenceList.append(s[0:end]) else: start = dot_pos[i-1] end = dot_pos[i] positiveSentenceList.append(s[start+1:end]) if dot_pos[-1] < len(s)-1: start = dot_pos[-1]+1 positiveSentenceList.append(s[start:]) else: # if its numbered list for i in range(len(num_pos)): try: start = num_pos[i]+2 end = num_pos[i+1]-3 positiveSentenceList.append(s[start:end+1]) except: start = num_pos[i]+2 positiveSentenceList.append(s[start:]) #mm = MetaMap.get_instance('/home/shukla/Documents/WMC/backendStuff/MetaMap/public_mm/bin/metamap') if presence_negatives: negativeSentencesClean = [x.encode("ascii").strip() for x in negativeSentenceList] negativePhrases = getMetamapResults(negativeSentencesClean,metaMapBinDir) for i in range(len(negativePhrases)): negativePhrases[i]['date'] = dt negativePhrases[i]['colorCode'] = 'neg' if presence_positives: positiveSentencesClean = [x.encode("ascii").strip() for x in positiveSentenceList] positivePhrases = getMetamapResults(positiveSentencesClean,metaMapBinDir) for i in range(len(positivePhrases)): positivePhrases[i]['date'] = dt positivePhrases[i]['colorCode'] = 'pos' completeList = negativePhrases + positivePhrases # nameJson = 'result'+dt+'.json' # with open(nameJson, 'w') as fp: # json.dump(completeList, fp) return completeList
def jsonProcessor(dt,data,metaMapBinDir): neg_pos = ['impression_negated','impression_positive'] negativePhrases = [] positivePhrases = [] completeList = [] positiveSentenceList = [] negativeSentenceList = [] presence_negatives = True presence_positives = True if len(data[0][neg_pos[0]]) == 0: presence_negatives = False if len(data[0][neg_pos[1]]) == 0: presence_positives = False if presence_negatives: s = data[0][neg_pos[0]] pos = [] for ct in range(len(s)): if s[ct].isdigit() and s[ct+1] == '.': pos.append(ct) if len(pos) == 0: negativeSentenceList.append(s) else: # If its a numbered list for i in range(len(pos)): try: start = pos[i]+2 end = pos[i+1]-3 negativeSentenceList.append(s[start:end+1]) except: start = pos[i]+2 negativeSentenceList.append(s[start:]) if presence_positives: s = data[0][neg_pos[1]] pos = [] for ct in range(len(s)): if s[ct].isdigit() and s[ct+1] == '.': pos.append(ct) if len(pos) == 0: negativeSentenceList.append(s) else: # if its numbered list for i in range(len(pos)): try: start = pos[i]+2 end = pos[i+1]-3 positiveSentenceList.append(s[start:end+1]) except: start = pos[i]+2 positiveSentenceList.append(s[start:]) #mm = MetaMap.get_instance('/home/shukla/Documents/WMC/backendStuff/MetaMap/public_mm/bin/metamap') if presence_negatives: negativeSentencesClean = [x.encode("ascii").strip() for x in negativeSentenceList] negativePhrases = getMetamapResults(negativeSentencesClean,metaMapBinDir) for i in range(len(negativePhrases)): negativePhrases[i]['date'] = dt negativePhrases[i]['colorCode'] = -1 if presence_positives: positiveSentencesClean = [x.encode("ascii").strip() for x in positiveSentenceList] positivePhrases = getMetamapResults(positiveSentencesClean,metaMapBinDir) for i in range(len(positivePhrases)): positivePhrases[i]['date'] = dt positivePhrases[i]['colorCode'] = 1 completeList = negativePhrases + positivePhrases nameJson = 'result'+dt+'.json' with open(nameJson, 'w') as fp: json.dump(completeList, fp) return completeList
def jsonProcessor(accession, jsonObject, metaMapBinDir): negativePhrases = [] positivePhrases = [] completeList = [] recordNegations = defaultdict(int) negatedSentences = [] positiveSentences = [] presence_negatives = True presence_positives = True if len(jsonObject["impression_negated"]) == 0: presence_negatives = False if len(jsonObject["impression_positive"]) == 0: presence_positives = False # Taking field "impression all" from json impressions_all = jsonObject["impression_all"] # Replacing all newline characters impressions_all = impressions_all.replace("\n", " ") # Parsing the main sentence list sentenceList = parseImpressions(impressions_all) if presence_negatives: impression_negated = jsonObject["impression_negated"] # Replacing all newline characters impression_negated = impression_negated.replace("\n", " ") # Parsing the impression_negated phrases negatedPhrasesList = parseImpressions(impression_negated) # Compare each negative phrase with each in sentence list - its there according to difflib then add it to list # of negative Sentences. Also record the indices of negative sentences selected so as to populate positive # sentence list if presence_negatives: for each in negatedPhrasesList: for i in range(len(sentenceList)): if each in sentenceList[i] or difflib.SequenceMatcher(None, sentenceList[i], each).ratio() > 0.95: negatedSentences.append(sentenceList[i]) recordNegations[i] = 1 break for i in range(len(sentenceList)): if recordNegations[i] == 0: positiveSentences.append(sentenceList[i]) if presence_negatives: negativeSentencesClean = [x.encode("ascii").strip() for x in negatedSentences] negativePhrases = getMetamapResults(negativeSentencesClean, metaMapBinDir) for i in range(len(negativePhrases)): negativePhrases[i]["accession"] = accession negativePhrases[i]["colorCode"] = "neg" if presence_positives: positiveSentencesClean = [x.encode("ascii").strip() for x in positiveSentences] positivePhrases = getMetamapResults(positiveSentencesClean, metaMapBinDir) for i in range(len(positivePhrases)): positivePhrases[i]["accession"] = accession positivePhrases[i]["colorCode"] = "pos" completeList = negativePhrases + positivePhrases # nameJson = 'result'+dt+'.json' # with open(nameJson, 'w') as fp: # json.dump(completeList, fp) return completeList