for demo in demo_text: print('Processing text: ', demo) print('') print('###### Noun Phrases: ML Extractor #####') pprint(ml_np_extract(demo)) print('') print('###### Noun Phrases: RegEx Extractor #####') pprint(regex_np_extract(demo)) print('') print('###### NLTK NER #####') pprint(dict(nltk_extract_ner(demo))) print('') print('###### Stanford NER #####') pprint(dict(stanford_extract_ner(demo))) print('') print('###### MIT_IE NER #####') pprint(dict(mitie_extract_ner(demo))) print('') print('###### AlchemyAPI NER #####') pprint(dict(alchemyapi_extract_ner(demo))) print('')
path = os.path.join(os.getcwd(), "data-samples/random-article-for-category-News.json") with open(path) as data_file: data = json.load(data_file) articles = data["hits"]["hits"] for i in range(10): print("************* NUM : %d **************" % i) line = articles[i]["_source"]["summary_en"] try: line = line.decode("utf-8", "ignore") except UnicodeEncodeError: continue print("Processing text: ", line) print("") print("###### NLTK NER #####") print(nltk_extract_ner(line)) print("") print("###### Stanford NER #####") print(stanford_extract_ner(line)) print("") print("###### AlchemyAPI NER #####") print(alchemyapi_extract_ner(line)) print("")