Ejemplo n.º 1
0
    def test_extractNER_date(self):
        demo = "Jordan was born in Ponchatoula, Louisiana, to a working class family, and received his BS magna cum laude in Psychology in 1978 from the Louisiana State University, his MS in Mathematics in 1980 from the Arizona State University and his PhD in Cognitive Science in 1985 from the University of California, San Diego. At the University of California, San Diego Jordan was a student of David Rumelhart and a member of the PDP Group in the 1980s."

        res = stanford_extract_ner_series(tokenize(demo))
        self.assertEqual(res["Ponchatoula"], "LOCATION")
        self.assertEqual(res["David"], "PERSON")
        self.assertEqual(res["PDP"], "ORGANIZATION")
def stanford_extract_ner(text):
    tokens = tokenize(text)
    ent = st.tag(tokens)

    d = defaultdict(list)

    # keep the last class type
    c = 'O'
    #num = 0 # keep the order
    for e in ent:
        # if it is not null - O
        if not e[1] == ('O'):
            #remove puctuation
            s = str(e[0]).translate(string.maketrans("",""), string.punctuation)
            # last was O or now changed to different class
            if c != e[1]:
                d[e[1]].append(s)
                #num+=1 # if order is needed
            else:
                # else append to lest elem
                d[e[1]][-1]= d[e[1]][-1]+" "+s #change if order is needed
        c = e[1]
    return d