Ejemplo n.º 1
0
 def __init__(self):
     super(whutparser, self).__init__()
     # self.arg = arg
     self.root = "i.whut.edu.cn"
     self.catlist = []
     self.faclist = []
     self.parser = webparser()
Ejemplo n.º 2
0
 def parse(self, html, parse_type):
     '''Function to parse html data in a user defined way
     Input: html - html to be parsed
            parse_type - parameter that controls how the html will be parsed
     Return: N/A
     Side effect: Fill dataList with parsed html
     '''
     parser = webparser(html, parse_type)
     self.dataList += parser.parse()
Ejemplo n.º 3
0
def parseshoplistdirect(ws, url):
    count = 1
    totalpage = 0
    hastotalpage = False
    while (url != None):
        #page = ws.getpagebyurl(url)
        page = ws.getpagebyurlwithheader(url)
        wp = webparser('taobao', page) 
        wp.parsepage()
        url = wp.getnext()
        print "== Parse Page %d finished ==" % count

        #if (hastotalpage == False):
        (totalpage, hastotalpage) = wp.gettotalpagenumber()
            
        if (count < totalpage): 
            count = count + 1
            time.sleep(10)
        else : break      
Ejemplo n.º 4
0
def parseshoplistdirect(ws, url):
    count = 1
    totalpage = 0
    hastotalpage = False
    while (url != None):
        #page = ws.getpagebyurl(url)
        page = ws.getpagebyurlwithheader(url)
        wp = webparser('taobao', page)
        wp.parsepage()
        url = wp.getnext()
        print "== Parse Page %d finished ==" % count

        #if (hastotalpage == False):
        (totalpage, hastotalpage) = wp.gettotalpagenumber()

        if (count < totalpage):
            count = count + 1
            time.sleep(10)
        else:
            break
Ejemplo n.º 5
0
def parseshoplistbybrowser(mb, url):
    count = 1
    totalpage = 0
    hastotalpage = False
    ratelinklist = []
    while (url != None):
        page = mb.getpagebyurl(url)
        wp = webparser('taobao', page) 
        wp.parsepage()
        url = wp.getnext()
        ratelinklist.append(wp.ratelinklist)
        #if (hastotalpage == False):
        (totalpage, hastotalpage) = wp.gettotalpagenumber()
        print "== Parse Page %d finished ==" % count
                   
        if (count < totalpage): 
            count = count + 1
            time.sleep(10)
        else : break
        
        break
        
    return ratelinklist
Ejemplo n.º 6
0
def parseshoplistbybrowser(mb, url):
    count = 1
    totalpage = 0
    hastotalpage = False
    ratelinklist = []
    while (url != None):
        page = mb.getpagebyurl(url)
        wp = webparser('taobao', page)
        wp.parsepage()
        url = wp.getnext()
        ratelinklist.append(wp.ratelinklist)
        #if (hastotalpage == False):
        (totalpage, hastotalpage) = wp.gettotalpagenumber()
        print "== Parse Page %d finished ==" % count

        if (count < totalpage):
            count = count + 1
            time.sleep(10)
        else:
            break

        break

    return ratelinklist
Ejemplo n.º 7
0
def parseshopratedetailbybrowser(mb, url):
    page = mb.getpagebyurl(url)
    wp = webparser('rate information', page)
    wp.parseproductratedetail(wp.soup)
    print "== Parse rate information finished =="
Ejemplo n.º 8
0
def getshoplistbybrowser(mb, url):
    page = mb.getpagebyurl(url)
    wp = webparser('taobao', page)
    catlist = wp.parseproductcat()
    
    return catlist
Ejemplo n.º 9
0
def parseshopratedetailbybrowser(mb, url):
    page = mb.getpagebyurl(url)
    wp = webparser('rate information', page)
    wp.parseproductratedetail(wp.soup)
    print "== Parse rate information finished =="
Ejemplo n.º 10
0
def getshoplistbybrowser(mb, url):
    page = mb.getpagebyurl(url)
    wp = webparser('taobao', page)
    catlist = wp.parseproductcat()

    return catlist