Example #1
0
def get(base, dst):
    productplist = list()

    pcount = 1
    for href in t.exp(base % r'1').xpath(r'//*[@class = "pg"]//a/@href'):
        href = int(href.split(r'.')[0].split(r'-')[-1])
        if href > pcount:
            pcount = href

    for i in range(1, pcount + 1):
        params = dict()
        params[r'dst'] = dst
        params[r'url'] = base % i
        productplist.append(params)

    threadinfos = dict()
    for data in t.reducer(productplist, mapper_product):
        threadinfos.update(data)

    pginfos = dict()
    for data in t.reducer(threadinfos.values(), mapper_pg):
        pginfos.update(data)

    imginfos = dict()
    for data in t.reducer(pginfos.values(), mapper_img):
        imginfos.update(data)

    t.reducer(imginfos.values(), mapper_get)
    return True
Example #2
0
def query_pg_images(url):
    result = None
    try:
        data = list()
        for x in t.exp(url).xpath(r'//*[@class = "adw"]//img/@src'):
            data.append(t.http_urljoin(url, x))

        result = data
    finally:
        return result
Example #3
0
def query_pgs(url):
    result = None
    try:
        data = [url]
        for x in t.exp(url).xpath(r'//*[@class = "pg"]//a[not(@class)]/@href'):
            data.append(t.http_urljoin(url, x))

        result = data
    finally:
        return result
Example #4
0
def query_threadpages(url):
    result = None
    try:
        urls = [url]

        maxp = 1
        for x in t.exp(url).xpath(r'//div[@id = "pages"]/a/text()'):
            if not x.isnumeric():
                continue

            if int(x) > maxp:
                maxp = int(x)

        for i in range(2, maxp + 1):
            urls.append(t.http_urljoin(url, r'%s.html' % i))

        result = urls
    finally:
        return result
Example #5
0
def query_product_page(url, dst):
    result = None
    try:
        threadinfos = dict()
        for group in t.exp(url, r'utf-8').xpath(r'//*[@class = "group"]'):
            link = group.xpath(r'.//*[@class = "bution"]//a')[0]
            threadurl = t.http_urljoin(url, t.expa(link, r'href'))
            thread = dict()
            thread[r'cover'] = t.exps(
                group.xpath(r'.//*[@class = "photo"]//img/@src'))
            thread[r'url'] = threadurl
            thread[r'subject'] = r'%s[%s]' % (fixsubject(
                t.expt(link)), re.sub(r'\D', r'', threadurl))
            thread[r'dst'] = dst
            for v in thread.values():
                if not v:
                    return
            threadinfos[threadurl] = thread

        result = threadinfos
    finally:
        return result
Example #6
0
def query_threadpageimages(url):
    result = None
    try:
        result = t.exp(url).xpath(r'//div[@class = "content"]/img/@src')
    finally:
        return result
Example #7
0
def query_navpage_threadobjtbl(url, err=None):
    xx = t.exp(url, err=err)
    if xx is None:
        return None
    return xx.xpath(r'//div[@class = "hezi"]/ul/li')
Example #8
0
def query_product_total(url, err=None):
    xx = t.exp(url, err=err)
    if xx is None:
        return None
    return t.exps(xx.xpath(r'//div[@class = "shoulushuliang"]/span/text()'))