def main(url): ws = webspider() mb = mybrowser() ws.setProxy('10.235.96.250', 'axdsp', 'wel57come') mb.setProxy('10.235.96.250:8080', 'axdsp', 'wel57come') mb.setCookie() mb.setbrowseroptions() mb.setagent() linklist = [] # get shop cat list catlist = getshoplistbybrowser(mb, url) # parse shop list one by one for item in catlist: linklist.append(parseshoplistbybrowser(mb, item)) break # get shop rate link list for item in linklist: for rate in item: for detail in rate: parseshopratedetailbybrowser(mb, detail)
term = eachline dirname = savedir + "/" + ("%04d" % countdir) if not os.path.exists(dirname): os.makedirs(dirname) #capturing web #print u'capturing web' print "\nsearch: %s" % term.strip('\n') #write termfile: term_fname = dirname + "/" + 'search_term.txt' termfile = open(term_fname,'w') print>>termfile,("(%d, \'%s\')" % (countdir,term.strip('\n'))) countdir += 1 termfile.close() myspider = webspider.webspider() res = myspider.querythisword(term, n_images) jsonres = json.loads(res.replace("\\","\\\\")) allreturn = [] allreturn.extend(jsonres['responseData']['results']) #download images countfile = 0 allurls = [] allfnames = [] for i,aret in enumerate(allreturn): #print aret filename = dirname + "/" + ("%04d" % countfile) + ".jpg" countfile += 1 print "save to: " + filename print "downloading: " + aret["tbUrl"]