forked from eight04/ComicCrawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cc_sfacg.py
51 lines (38 loc) · 1.09 KB
/
cc_sfacg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#! python3
"""this is sfacg module for comiccrawler
Ex:
http://comic.sfacg.com/HTML/PFSLL/
"""
import re
import comiccrawler
from safeprint import safeprint
header = {}
domain = ["comic.sfacg.com"]
name = "SF"
def gettitle(html, **kw):
html = html.replace("\n","")
t = re.search("<title>(.+?)</title>", html).group(1)
return t.split(",")[0]
def getepisodelist(html, url=""):
# html = html.replace("\n","")
ms = re.findall("<li><a href=\"(.+?)\" target=\"_blank\">(.+?)</a></li>", html, re.M)
base = re.search("(https?://[^/]+)", url).group(1)
safeprint(ms)
s = []
for m in ms:
url, title = m
e = comiccrawler.Episode()
title = re.sub("<.+?>","",title)
e.title = title
e.firstpageurl = base + url
s.append(e)
return s[::-1]
def getimgurls(html, page=0, url=""):
js = re.search("src=\"(/Utility/.+?\.js)\"", html).group(1)
base = re.search("(https?://[^/]+)", url).group(1)
htmljs = comiccrawler.grabhtml(base + js)
host = "http://coldpic.sfacg.com"
pics = re.findall("picAy\[\d+\] = \"(.+?)\"", htmljs)
return [base + pic for pic in pics]
def errorhandler(er, ep):
pass