Python get_html Exemples, zipper.get_html Python Exemples

Exemple #1

0

Afficher le fichier

def main():
    try:
        #gob
        URL = "https://es.marvelcdb.com/set/" + "trors"
        html = zipper.get_html(URL)
        scrapeMain(html, "TheRiseOfRedSkull")
    except Exception as e:
        print(e)

Exemple #2

0

Afficher le fichier

def obtainIssueImages(co_url : str):
   co_html : BeautifulSoup = zipper.get_html(co_url)
   # text treating
   images_to_do = obtainIssueImagesByText(co_html)
   #if 0 >= len(images_to_do):
      # html treating
      #obtainIssueImagesByHtml(co_html)
   return images_to_do

Exemple #3

0

Afficher le fichier

def scrapeIssue(url: str, dir: str):
    imgs = zipper.get_html(url).findAll(
        'img', src=True, attrs={'class': 'img-responsive img-vertical-card'})
    imgs_count = 0
    for img in imgs:
        title = "card-{}.jpg".format(card_idx)
        img_url = img['src']
        if not (img_url.startswith('http')):
            img_url = "https://es.marvelcdb.com" + img_url
        print(" " + title + ": " + img_url, )
        zipper.get_image(img_url, dir + "/" + title)
        imgs_count += 1
        inc_card_idx()
    print(url + " - Imagenes procesadas = {}".format(imgs_count))

Exemple #4

0

Afficher le fichier

def main():
   try:
      # comic to scrape
      title : str = 'Conan-the-Barbarian-1970'
      # dir destiny
      dir = title.replace(':', '_').replace(',', '_').replace(' ', '_').replace('(', '').replace(')', '')
      if not os.path.exists(dir):
         os.makedirs(dir)
      # scrape all comic items
      html = zipper.get_html(URL + "/Comic/" + title)
      scrapeComic(html, dir)
      # zip result directory with cbz extension
      zipper.zip(dir, dir + '.cbz')
   except Exception as e:
      print(e)

Exemple #5

0

Afficher le fichier

def main():
    try:
        URL = 'https://readcomiconline.to/Comic/'
        title = 'The-Savage-Sword-Of-Conan'
        html = zipper.get_html(URL + title)
        # dir destiny
        dir = title.replace(':', '_').replace(',',
                                              '_').replace(' ', '_').replace(
                                                  '(', '').replace(')', '')
        if not os.path.exists(dir):
            os.makedirs(dir)

        issue_idx = 96
        scrapeMain(html, dir, issue_idx)

        zipper.zip(dir, dir + '.cbz')
        #scrapeIssue('TPB', 'https://readcomiconline.to/Comic/Marvel-Zombies-2006/TPB?id=158840', 'Marvel_Zombies_(2006)/issue-TPB')
    except Exception as e:
        print(e)

Exemple #6

0

Afficher le fichier

def scrapeIssue(co_title: str, co_url: str, co_dir: str):
    print(" " + co_title + ": " + co_url)
    if not os.path.exists(co_dir):
        os.makedirs(co_dir)
    co_html = zipper.get_html(co_url).prettify()
    img_idx = 0
    html_idx: int = co_html.find('lstImages.push')
    while -1 != html_idx:
        co_html = co_html[html_idx:]
        img_url = co_html[:co_html.find('");')]
        img_url = img_url.replace('lstImages.push("', '').replace('")', '')
        img_title = "img-" + str(img_idx).zfill(3) + ".jpg"
        print(img_title + ": " + img_url)
        zipper.get_image(img_url, co_dir + "/" + img_title)
        # next img
        img_idx = img_idx + 1
        co_html = co_html[co_html.find(';'):]
        html_idx = co_html.find('lstImages.push')
        time.sleep(5)