def TaskStarter(): client = redis.Redis(host=setting.REDIS_SERVER, port=setting.REDIS_PORT, password=setting.REDIS_PW, db=0) taskurl = client.lpop(setting.REDIS_TASKQUEUE) current = client.get(setting.REDIS_TASK_CURRENT) if taskurl == None and current == None: log.info(setting.start_urls) print setting.start_urls # start Redis try: html = posthtml(setting.start_urls, {"top": 30}) except Exception, e: print e log.warning(e) raise e jsondata = json.loads(html) for node in jsondata: client.lpush(setting.REDIS_TASKQUEUE, node["CM_Keywords"])
url = "http://www.superimagemarket.com/WebServices/KeyWordsImage.asmx/GetImagesByKeyWords" data = {} data["pageSize"] = 99 data["pageIndex"] = 1 data["KeyWords"] = taskurl data["People"] = 0 data["Color"] = 0 data["Formats"] = 0 data["MediaType"] = 0 data["Price"] = 0 data["Rating"] = 0 data["Licence"] = 0 data["fldSort"] = "Best Match" data["Order"] = 1 # print data jsondata = posthtml(url, data) except Exception, e: print e log.warning(e) continue # get cate page number CODEC = "UTF-8" # print pageallnum decodedata = json.loads(jsondata) pageallnum = int(int(decodedata["count"]) / 99) + 1 # produce each page url by task url current = client.get(setting.REDIS_TASK_CURRENT) if current == None: current = 1