def parse_add(self, url, response_body): a=newspaper.Article("") a.is_downloaded = True a.html = response_body a.parse() a.nlp() img_link = a.top_img named, persons, orgs= self.get_named_entities(a.text) print "alma base search started" persons = [person for person in persons if alma.search(person) > 0] orgs = [org for org in orgs if alma.search(org) > 0] print "alma base search ended" author="default" try: author=a.author[0] except: print "Not found" art=self.add_article(a.title,a.summary,url,author,img_link, named, persons, orgs) # test_keywords(art,newsClassifier) return art
def train(url,keywords): # for url in urls: a=newspaper.Article(url) a.download() a.parse() a.nlp() img_link = a.top_img named, persons, orgs= get_named_entities(a.text) print "alma base search started" persons = [person for person in persons if alma.search(person) > 0] orgs = [org for org in orgs if alma.search(org) > 0] print "alma base search stoped" author="default" try: author=a.author[0] except: print "Not found" art=add_article(a.title,a.summary,url,author,img_link, named, persons, orgs) # test_keywords(art,newsClassifier) return keywords
def train(url,keywords): # for url in urls: a=newspaper.Article(url) a.download() a.parse() a.nlp() img_link = a.top_img named, persons, orgs= get_named_entities(a.text) print "alma base search started" persons = [person for person in persons if alma.search(person) > 0] orgs = [org for org in orgs if alma.search(org) > 0] print "alma base search ended" author="default" try: author=a.author[0] except: print "Not found" art=add_article(a.title,a.summary,url,author,img_link, named, persons, orgs) # test_keywords(art,newsClassifier) return keywords