def get_contents_key(self, div): """Gets key tag from article. """ if self.contents_key == 'read-time': key_tag = div.find('a', attrs={'class': ['reading-time']}) else: url = 'http://' + BasicNewsRecipe.tag_to_string(div.find('a', attrs={'class': ['tool link']})) soup = self.browser.index_to_soup(url) key_tag = soup.find('title') return BasicNewsRecipe.tag_to_string(key_tag)
def extract_info(self,div): a = div.find('a', href=True) if a: url = self.base_url + a['href'] title = BasicNewsRecipe.tag_to_string(a, use_alt=False) description = url pubdate = strftime('%a, %d %b') summary = div.find('p') if summary: description = BasicNewsRecipe.tag_to_string(summary, use_alt=False) return dict(title=title, url=url, date=pubdate,description=description, content='')
def get_contents_key(self, div): """Gets key tag from article. """ if self.contents_key == 'read-time': key_tag = div.find('a', attrs={'class': ['reading-time']}) else: url = 'http://' + BasicNewsRecipe.tag_to_string( div.find('a', attrs={'class': ['tool link']})) soup = self.browser.index_to_soup(url) key_tag = soup.find('title') return BasicNewsRecipe.tag_to_string(key_tag)
def get_contents_key(self, div): """Gets key tag from article. """ if self.contents_key == 'read-time': key_tag = div.find('a', attrs={'class': ['reading-time']}) elif self.contents_key == 'title-and-read-time': reading_time = ' (' + str(div.find('a', attrs={'class': ['reading-time']}).contents[0]).replace('<span>', '').replace('</span>', '') + ')' key_tag = div.find('a').contents[0].rstrip('\n') + reading_time else: url = 'http://' + BasicNewsRecipe.tag_to_string(div.find('a', attrs={'class': ['tool link']})) key_tag = '{uri.netloc}'.format(uri=urlparse(url)) return BasicNewsRecipe.tag_to_string(key_tag)
def extract_info(self, div): a = div.find('a', href=True) if a: url = self.base_url + a['href'] title = BasicNewsRecipe.tag_to_string(a, use_alt=False) description = url pubdate = strftime('%a, %d %b') summary = div.find('p') if summary: description = BasicNewsRecipe.tag_to_string(summary, use_alt=False) return dict(title=title, url=url, date=pubdate, description=description, content='')
def get_contents_key(self, div): """Gets key tag from article. """ if self.contents_key == 'read-time': key_tag = div.find('a', attrs={'class': ['reading-time']}) elif self.contents_key == 'title-and-read-time': reading_time = ' (' + str( div.find('a', attrs={ 'class': ['reading-time'] }).contents[0]).replace('<span>', '').replace('</span>', '') + ')' key_tag = div.find('a').contents[0].rstrip('\n') + reading_time else: url = 'http://' + BasicNewsRecipe.tag_to_string( div.find('a', attrs={'class': ['tool link']})) key_tag = '{uri.netloc}'.format(uri=urlparse(url)) return BasicNewsRecipe.tag_to_string(key_tag)
def __init__(self,indexPage): nb_results = BasicNewsRecipe.tag_to_string(indexPage.find('div', attrs={'class': 'nb-results'})) if nb_results != None: numbersOnResult = re.findall(r'\d+', nb_results) self.articles_number = int(numbersOnResult[0]) if numbersOnResult else 1