Ejemplo n.º 1
0
    def parse(self, reg, range_postfix):
        bodies = []
        start, end = tuple(range_postfix)
        for i in tqdm(range(start, end), desc=f'{self.name}'):
            url = self.url.link(i)
            text = Text.create_by_url(url)
            bodies_sub = text.findall(reg)
            bodies.extend(bodies_sub)

        urls = self.url.concat_site_with_bodies(bodies)
        return urls
Ejemplo n.º 2
0
 def __init__(self, site, url):
     self.url = url
     self.site = site
     self.text = Text.create_by_url(url)
Ejemplo n.º 3
0
 def urls(self):
     reg = r'<div class="content-list_item" data-reactid=".*?"><div class="content-list_item-info" data-reactid=".*?"><div class="media-preview" data-reactid=".*?"><a class="media-preview_img-wrap" href="(.*?)"'
     text = Text.create_by_url(self.url.url)
     urls = text.findall(reg)
     urls = self.url.concat_site_with_bodies(urls)
     return urls