def addNewsArticle(companyModel,newsSourceModel,url): tree = html.fromstring(requests.get(url).text) # Need the article title title = tree.xpath('//h1/text()')[0] # Need the text of the article textElements = tree.xpath("//*[@id='articleText']//*/text()") text = "" for t in textElements: text += t # Need the date field dateText = tree.xpath("//div[@id='articleInfo']//span[@class='timestamp']/text()")[0] date = datetime.datetime.strptime(dateText[:-4], "%a %b %d, %Y %I:%M%p").date() News.create( company=companyModel, newsSource=newsSourceModel, title=title, text=text, url=url, date = date ) print "Added: "+title
def post(self, *args): cat_id = args[0] news_title = self.get_argument("news-title") news_body = self.get_argument("news-body") news_date = self.get_argument("news-date") au_id = self.GetAuthorIdByEmail(self.current_user) News.create( title=news_title, body=news_body, date=news_date, author=au_id, category=cat_id, ) self.write("Item saved...") self.redirect("/category")
def get_news(): # Initialize the list results = [] # Iterate each keyword of the list for keyword in keywords: start = random.randrange(0, 180, 10) # Asign to the URL the keyword to search URL = "https://google.com/search?q=" + keyword + "&start=" + str(start) search = requests.get(URL, headers=HEADERS) # If the search has a 200 status we parse the content found if search.status_code == 200: response = BeautifulSoup(search.content, "html.parser") # In the response we find the common class rc and their anchors for new in response.find_all('div', class_='rc', limit=3): anchors = new.find_all('a') if anchors: # Get the content of anchor: href reference = anchors[0]['href'] # Search in database if the reference is current saved find_reference = News.query.filter_by( reference=reference).first() # If the reference is not saved in database we extract the data of the search if not find_reference: # Get the text of the results, like title and excerpt of the post/article according to specific tags and class title = new.find('h3').text excerpt = new.find('span', class_='st').text news = { "keyword": keyword, "title": title, "reference": reference, "excerpt": excerpt } # Join content of the excerpt and title, and pass to lowercase to ensure a better count of frequency words full_text = news['excerpt'].lower( ) + news['title'].lower() text = full_text.split(" ") FreqDistBody = FreqDist(text) frequency = FreqDistBody[keyword] # Adding a new key value pair to news news.update({'frequency': frequency}) results.append(news) # Store in database the results of the search new = News.create(news['title'], news['excerpt'], news['reference'], news['keyword'], news['frequency']) return jsonify(sorted(results, key=lambda k: k['frequency'], reverse=True))