Esempio n. 1
0
    def sync(obj: Base, *args, **kwargs):

        r = Internet.html_get(obj.sync_type.base_url)
        links = r.html.find(".entry-title")

        for h2 in links[::-1]:
            a = h2.find("a", first=True)
            url = a.attrs.get('href')
            name = a.text.strip()

            obj.add_text_task(unique_key=url, name=name, url=url, data={})
Esempio n. 2
0
    def sync(obj: Base, *args, **kwargs):
        cat = kwargs.get("cat", "")
        typ = kwargs.get("typ", "")
        url = obj.sync_type.base_url.format(cat=cat, typ=typ)

        res = Internet.html_get(url)
        links = res.html.xpath("/html/body/main/section/article[*]/h2/a")

        for a in links[::-1]:
            link = a.attrs.get("href").split("?")[0]
            name = a.text.strip()

            obj.add_text_task(unique_key=link, name=name, url=link, data={})
Esempio n. 3
0
    def sync(obj: Base, *args, **kwargs):

        base_url = obj.sync_type.base_url
        res = Internet.html_get(base_url)
        h2_list = res.html.find(".post-card-title")[::-1]

        for h2 in h2_list:
            a = h2.find("a", first=True)
            link = urllib.parse.urljoin(base_url, a.attrs.get('href'))
            print(a.text.strip(), link)
            obj.add_text_task(unique_key=link,
                              name=a.text.strip(),
                              url=link,
                              data={})
Esempio n. 4
0
async def test_url(url_test: UrlTest):
    """
    Test given URL
    """
    try:
        res = Internet.html_get(url_test.url)
        data = UrlTestOut(
            url=url_test.url,
            status_code=res.status_code,
            body=res.content,
            headers=res.headers
        )
    except Exception as e:
        data = str(e)
    return data
Esempio n. 5
0
    def sync(obj: Base, *args, **kwargs):

        r = Internet.html_get(obj.sync_type.base_url)
        links = r.html.xpath(
            '/html/body/div[1]/div[2]/div/div[4]/div[2]/section/div[*]/div[*]/div[2]/a'
        )

        for a in links[::-1]:
            url = a.attrs.get('href').split("?")[0]
            name = a.text.strip()

            obj.add_text_task(unique_key=url,
                              name=name,
                              url=url,
                              data=dict(text=url))
Esempio n. 6
0
    def sync(obj: Base, *args, **kwargs):

        res = Internet.html_get(obj.sync_type.base_url)
        links = res.html.xpath(
            "/html/body/form/div[4]/div[3]/div/div[1]/div[*]/div/div[1]/h3/a")

        for a in links[::-1]:

            link = a.attrs.get('href')
            url = urllib.parse.urljoin(obj.sync_type.base_url, link)

            name = a.text.strip()

            obj.add_text_task(unique_key=url,
                              name=name,
                              url=url,
                              data=dict(text=url))
Esempio n. 7
0
    def sync(obj: Base, *args, **kwargs):        
        
        cat = kwargs.get("cat", "")
        url = urllib.parse.urljoin(obj.sync_type.base_url, cat)        
        res = Internet.html_get(url)
        h2_list = res.html.find(".crayons-story__title")        

        for h2 in h2_list[::-1]:            
            a = h2.find('a', first=True)

            url = urllib.parse.urljoin(obj.sync_type.base_url, a.attrs.get('href'))

            obj.add_text_task(
                unique_key=a.attrs.get('id').strip(),
                name=a.text.strip(),
                url=url,
                data=dict(text=url)
            )
Esempio n. 8
0
    def sync(obj: Base, *args, **kwargs):

        r = Internet.html_get(obj.sync_type.base_url)

        found_links = []

        xpaths = obj.sync_type.extras.get("xp")

        for xpath in xpaths:
            links = r.html.xpath(xpath)
            if links:
                found_links.extend(links)

        for a in found_links[::-1]:
            url = a.attrs.get('href').split("?")[0]
            name = a.text.strip()

            obj.add_text_task(unique_key=url, name=name, url=url, data={})
Esempio n. 9
0
    def sync(obj: Base, *args, **kwargs):

        r = Internet.html_get(obj.sync_type.base_url)

        links = []

        xpaths = obj.sync_type.extras.get("xp")

        for xpath in xpaths:
            links = r.html.xpath(xpath)
            if links: break

        article_url = obj.sync_type.extras.get("article_url")

        for a in links[::-1]:
            path = a.attrs.get('href')
            url = urllib.parse.urljoin(article_url, path)
            name = a.text.strip()

            obj.add_text_task(unique_key=url, name=name, url=url, data={})
Esempio n. 10
0
    def sync(obj: Base, *args, **kwargs):
        cat = kwargs.get("cat", "")
        url = obj.sync_type.base_url.format(cat=cat)

        res = Internet.html_get(url)

        xpaths = [
            "/html/body/main/div[2]/div/div/div[1]/div/div[2]/div/article[*]/div/div[2]/a",
            "/html/body/main/div[2]/div/div/div[1]/div/article[*]/div/div[2]/a",
        ]

        links = []

        for xpath in xpaths:
            links = res.html.xpath(xpath)
            if links: break

        f_url = obj.sync_type.extras.get("base_url")

        for a in links[::-1]:
            link = urljoin(f_url, a.attrs.get("href"))
            name = a.text.strip().replace("\n", "--")

            obj.add_text_task(unique_key=link, name=name, url=link, data={})