def test_extractNER_date(self): demo = "Jordan was born in Ponchatoula, Louisiana, to a working class family, and received his BS magna cum laude in Psychology in 1978 from the Louisiana State University, his MS in Mathematics in 1980 from the Arizona State University and his PhD in Cognitive Science in 1985 from the University of California, San Diego. At the University of California, San Diego Jordan was a student of David Rumelhart and a member of the PDP Group in the 1980s." res = stanford_extract_ner_series(tokenize(demo)) self.assertEqual(res["Ponchatoula"], "LOCATION") self.assertEqual(res["David"], "PERSON") self.assertEqual(res["PDP"], "ORGANIZATION")
def stanford_extract_ner(text): tokens = tokenize(text) ent = st.tag(tokens) d = defaultdict(list) # keep the last class type c = 'O' #num = 0 # keep the order for e in ent: # if it is not null - O if not e[1] == ('O'): #remove puctuation s = str(e[0]).translate(string.maketrans("",""), string.punctuation) # last was O or now changed to different class if c != e[1]: d[e[1]].append(s) #num+=1 # if order is needed else: # else append to lest elem d[e[1]][-1]= d[e[1]][-1]+" "+s #change if order is needed c = e[1] return d