Esempio n. 1
0
def main(url):
    ws = webspider()
    mb = mybrowser()

    ws.setProxy('10.235.96.250', 'axdsp', 'wel57come')
    mb.setProxy('10.235.96.250:8080', 'axdsp', 'wel57come')
    mb.setCookie()
    mb.setbrowseroptions()
    mb.setagent()

    linklist = []
    # get shop cat list
    catlist = getshoplistbybrowser(mb, url)
    # parse shop list one by one
    for item in catlist:
        linklist.append(parseshoplistbybrowser(mb, item))
        break

    # get shop rate link list
    for item in linklist:
        for rate in item:
            for detail in rate:
                parseshopratedetailbybrowser(mb, detail)
Esempio n. 2
0
def main(url):
    ws = webspider()
    mb = mybrowser()
    
    ws.setProxy('10.235.96.250', 'axdsp', 'wel57come')
    mb.setProxy('10.235.96.250:8080', 'axdsp', 'wel57come')
    mb.setCookie()
    mb.setbrowseroptions()
    mb.setagent()
    
    linklist = []
    # get shop cat list
    catlist = getshoplistbybrowser(mb, url)
    # parse shop list one by one   
    for item in catlist:
        linklist.append(parseshoplistbybrowser(mb, item))
        break
    
    # get shop rate link list
    for item in linklist:
        for rate in item:
            for detail in rate:
                parseshopratedetailbybrowser(mb, detail)
        term = eachline
        dirname = savedir + "/" + ("%04d" % countdir)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        #capturing web
        #print u'capturing web'
        print "\nsearch: %s" % term.strip('\n')
        
        #write termfile:
        term_fname = dirname + "/" + 'search_term.txt'
        termfile = open(term_fname,'w')
        print>>termfile,("(%d, \'%s\')" % (countdir,term.strip('\n')))
        countdir += 1
        termfile.close()
        
        myspider = webspider.webspider()
        res = myspider.querythisword(term, n_images)
        jsonres = json.loads(res.replace("\\","\\\\"))
        allreturn = []
        allreturn.extend(jsonres['responseData']['results'])

        #download images
        countfile = 0
        allurls = []
        allfnames = []
        for i,aret in enumerate(allreturn):
            #print aret
            filename = dirname + "/" + ("%04d" % countfile) + ".jpg"
            countfile += 1
            print "save to: " + filename
            print "downloading: " + aret["tbUrl"]