예제 #1
0
    def get_contents_key(self, div):
        """Gets key tag from article. """
    
        if self.contents_key == 'read-time':
            key_tag = div.find('a', attrs={'class': ['reading-time']})
        else:
            url = 'http://' + BasicNewsRecipe.tag_to_string(div.find('a', attrs={'class': ['tool link']}))
            soup = self.browser.index_to_soup(url)
            key_tag = soup.find('title')

        return BasicNewsRecipe.tag_to_string(key_tag)
예제 #2
0
 def extract_info(self,div):
     a = div.find('a', href=True)
     if a:
         url = self.base_url + a['href']
         title = BasicNewsRecipe.tag_to_string(a, use_alt=False)
         description = url
         pubdate = strftime('%a, %d %b')
         summary = div.find('p')
         if summary:
             description = BasicNewsRecipe.tag_to_string(summary, use_alt=False)
         return dict(title=title, url=url, date=pubdate,description=description, content='') 
예제 #3
0
    def get_contents_key(self, div):
        """Gets key tag from article. """

        if self.contents_key == 'read-time':
            key_tag = div.find('a', attrs={'class': ['reading-time']})
        else:
            url = 'http://' + BasicNewsRecipe.tag_to_string(
                div.find('a', attrs={'class': ['tool link']}))
            soup = self.browser.index_to_soup(url)
            key_tag = soup.find('title')

        return BasicNewsRecipe.tag_to_string(key_tag)
예제 #4
0
    def get_contents_key(self, div):
        """Gets key tag from article. """
    
        if self.contents_key == 'read-time':
            key_tag = div.find('a', attrs={'class': ['reading-time']})
        elif self.contents_key == 'title-and-read-time':
			reading_time = ' (' + str(div.find('a', attrs={'class': ['reading-time']}).contents[0]).replace('<span>', '').replace('</span>', '') + ')'
			key_tag = div.find('a').contents[0].rstrip('\n') + reading_time
        else:
            url = 'http://' + BasicNewsRecipe.tag_to_string(div.find('a', attrs={'class': ['tool link']}))
            key_tag = '{uri.netloc}'.format(uri=urlparse(url))

        return BasicNewsRecipe.tag_to_string(key_tag)
예제 #5
0
 def extract_info(self, div):
     a = div.find('a', href=True)
     if a:
         url = self.base_url + a['href']
         title = BasicNewsRecipe.tag_to_string(a, use_alt=False)
         description = url
         pubdate = strftime('%a, %d %b')
         summary = div.find('p')
         if summary:
             description = BasicNewsRecipe.tag_to_string(summary,
                                                         use_alt=False)
         return dict(title=title,
                     url=url,
                     date=pubdate,
                     description=description,
                     content='')
예제 #6
0
    def get_contents_key(self, div):
        """Gets key tag from article. """

        if self.contents_key == 'read-time':
            key_tag = div.find('a', attrs={'class': ['reading-time']})
        elif self.contents_key == 'title-and-read-time':
            reading_time = ' (' + str(
                div.find('a', attrs={
                    'class': ['reading-time']
                }).contents[0]).replace('<span>', '').replace('</span>',
                                                              '') + ')'
            key_tag = div.find('a').contents[0].rstrip('\n') + reading_time
        else:
            url = 'http://' + BasicNewsRecipe.tag_to_string(
                div.find('a', attrs={'class': ['tool link']}))
            key_tag = '{uri.netloc}'.format(uri=urlparse(url))

        return BasicNewsRecipe.tag_to_string(key_tag)
예제 #7
0
 def __init__(self,indexPage):
     nb_results = BasicNewsRecipe.tag_to_string(indexPage.find('div', attrs={'class': 'nb-results'}))
     if nb_results != None:
         numbersOnResult = re.findall(r'\d+', nb_results)
         self.articles_number = int(numbersOnResult[0]) if numbersOnResult else 1